Mercurial > repos > pieterlukasse > prims_metabolomics
changeset 26:637830ac8bcd
added validation in metexp to tabular tool; added workaround/fix for L and D compound types
author | pieter.lukasse@wur.nl |
---|---|
date | Thu, 24 Apr 2014 11:28:38 +0200 |
parents | ab7f9ec70ffc |
children | ecd3f6c9e606 |
files | MsClust.jar export_to_metexp_tabular.xml rankfilter_GCMS/pdfread.py rankfilter_GCMS/test/test_pdfread.py test/test_query_mass_repos.py |
diffstat | 5 files changed, 33 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/export_to_metexp_tabular.xml Fri Apr 04 10:25:19 2014 +0200 +++ b/export_to_metexp_tabular.xml Thu Apr 24 11:28:38 2014 +0200 @@ -1,6 +1,6 @@ <tool id="export_to_metexp_tabular" name="METEXP - Tabular file" - version="0.1.0"> + version="0.2.0"> <description>Create tabular file for loading into METabolomics EXPlorer database</description> <command interpreter="python"> export_to_metexp_tabular.py $rankfilter_and_caslookup_combi $msclust_quant_file $output_result @@ -15,22 +15,33 @@ <param name="organism" type="text" size="80" label="Organism(s) info" - help="Metadata information to accompany the results when stored in MetExp DB." /> + help="Metadata information to accompany the results when stored in MetExp DB." > + <validator type="empty_field" message="A value is required."></validator><!-- attribute optional="False" does not seem to work for params so validator is added --> + </param> + <param name="tissue" type="text" size="80" label="Tissue(s) info" - help="Metadata information to accompany the results when stored in MetExp DB." /> + help="Metadata information to accompany the results when stored in MetExp DB." > + <validator type="empty_field" message="A value is required."></validator> + </param> <param name="experiment_name" type="text" size="80" label="Experiment name/code" - help="Name or code to store the results under. This can help you find the results back in MetExpDB." /> + help="Name or code to store the results under. This can help you find the results back in MetExpDB." > + <validator type="empty_field" message="A value is required."></validator> + </param> <param name="user_name" type="text" size="80" label="User name" - help="User name or code to store the results under. This can help you find the results back in MetExpDB." /> + help="User name or code to store the results under. This can help you find the results back in MetExpDB." > + <validator type="empty_field" message="A value is required."></validator> + </param> <param name="column_type" type="text" size="80" label="Column type" - help="Column type to report with the results. This can help you find the results back in MetExpDB." /> + help="Column type to report with the results. This can help you find the results back in MetExpDB." > + <validator type="empty_field" message="A value is required."></validator> + </param> </inputs> <outputs>
--- a/rankfilter_GCMS/pdfread.py Fri Apr 04 10:25:19 2014 +0200 +++ b/rankfilter_GCMS/pdfread.py Thu Apr 24 11:28:38 2014 +0200 @@ -52,8 +52,9 @@ for line in hit_list: line = line.strip().translate(None, '\r') if line != '': - hits = line.replace('\n', ' ').replace('\x0c', '').replace('^L', '').split('Hit') - + hits = line.replace('\n', ' ').replace('\x0c', '').replace('^L', '').split('Hit') #solution? : if we wouldn't replace the \n by ' ' but by some special sign, then reading formula would be simpler! + #strange....code seems fine actually...debug! See test/data/download.pdf + # strange thing is that it looks like the new line does not end up in the text file, eventhough it looks like there is a new line in the pdf...perhaps a bug in the pdf2text command in linux? spec_id = hits.pop(0).split(' ')[1] j = 0 for hh in hits: @@ -69,8 +70,13 @@ name_tmp = ':'.join(cell[0].split(':')[1:]) else: name_tmp = cell[0].split(':')[1] + + # uggly workaround for the cases where there ends up to be no space between the name and the formula: exaustive + # replaces of known cases by the same with a white space: name_tmp = name_tmp.replace('lC', 'l C').replace(']C', '] C').replace('sC', 's C').replace('9C', '9 C').replace('.C', '. C') name_tmp = name_tmp.replace(')C', ') C').replace('eC', 'e C').replace('yC', 'y C').replace('oC', 'o C').replace('-C', '- C').replace('dC', 'd C').replace('rC', 'r C') + name_tmp = name_tmp.replace('-, LC', '-, L C').replace('-, DC', '-, D C') + name.append((' '.join(name_tmp.split(' ')[0:len(name_tmp) - 1])).replace(" ", " ")) if name_tmp: if name_tmp.split(' ')[-1][0] == 'C' or name_tmp.split(' ')[-1][0] == 'F' or name_tmp.split(' ')[-1][0] == 'H':
--- a/rankfilter_GCMS/test/test_pdfread.py Fri Apr 04 10:25:19 2014 +0200 +++ b/rankfilter_GCMS/test/test_pdfread.py Thu Apr 24 11:28:38 2014 +0200 @@ -24,6 +24,13 @@ '18495-0.142537-21284-2.26544e+07-135', '22.6544', ' 714')) self.failUnless(expected_element in data) self.failUnless(len(hitlist_missed) != 0) + ''' + Check for last (dummy) hit: + Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, LC21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062. + ''' + expected_element = set(['C21H52O6Si5', ' 30645-02-4', ' mainlib', '15.6', ' (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', ' 658', '12.9014', '37062']) + self.failUnless(expected_element in data) + if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.test_getPDF']
--- a/test/test_query_mass_repos.py Fri Apr 04 10:25:19 2014 +0200 +++ b/test/test_query_mass_repos.py Thu Apr 24 11:28:38 2014 +0200 @@ -31,7 +31,7 @@ input_file = resource_filename(__name__, "data/service_query_tabular.txt") - molecular_mass_col = "MM" + molecular_mass_col = "mass (Da)" dblink_file = resource_filename(__name__, "data/MFSearcher ExactMassDB service.txt") output_result = resource_filename(__name__, outdir + "metexp_query_results_added.txt")