diff lib/hmdb.pm @ 4:6091a80df951 draft

planemo upload commit e2dfb6f7afeeb7767ef83e9e2c3a2d3e6a94ae98
author fgiacomoni
date Fri, 19 Jan 2018 09:38:17 -0500
parents 6d0a0f8f672a
children 7ed37640fe2a
line wrap: on
line diff
--- a/lib/hmdb.pm	Mon Feb 20 09:55:54 2017 -0500
+++ b/lib/hmdb.pm	Fri Jan 19 09:38:17 2018 -0500
@@ -271,23 +271,46 @@
 #    print Dumper $csv ;
     
     foreach my $line (@{$csv}) {
-    	
-    	if ($line !~ /query_mass,compound_id,formula,compound_mass,adduct,adduct_type,adduct_mass,delta/) {
-    		my @entry = split(/,/, $line) ;
-    		
-    		if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }
+    	## HMDB csv output - [DEPRECATED]...
+#    	if ($line !~ /query_mass,compound_id,formula,compound_mass,adduct,adduct_type,adduct_mass,delta/) {
+#    		my @entry = split(/,/, $line) ;
+#    		
+#    		if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }
+#    		
+#    		$features{ENTRY_ENTRY_ID} = $entry[1] ;
+#    		$features{ENTRY_FORMULA} = $entry[2] ;
+#    		$features{ENTRY_CPD_MZ} = $entry[3] ;
+#    		$features{ENTRY_ADDUCT} = $entry[4] ;
+#    		$features{ENTRY_ADDUCT_TYPE} = $entry[5] ;
+#    		$features{ENTRY_ADDUCT_MZ} = $entry[6] ;
+#    		$features{ENTRY_DELTA} = $entry[7] ;
+#    		
+#    		my %temp = %features ;
+#    		
+#    		push (@{$result_by_entry{$entry[0]} }, \%temp) ;
+#    	}
+    	## NEW HMDB format V4.0 - dec2017
+		if ($line !~ /query_mass,compound_id,compound_name,kegg_id,formula,monoisotopic_mass,adduct,adduct_type,adduct_m\/z,delta\(ppm\)/) {
     		
-    		$features{ENTRY_ENTRY_ID} = $entry[1] ;
-    		$features{ENTRY_FORMULA} = $entry[2] ;
-    		$features{ENTRY_CPD_MZ} = $entry[3] ;
-    		$features{ENTRY_ADDUCT} = $entry[4] ;
-    		$features{ENTRY_ADDUCT_TYPE} = $entry[5] ;
-    		$features{ENTRY_ADDUCT_MZ} = $entry[6] ;
-    		$features{ENTRY_DELTA} = $entry[7] ;
-    		
-    		my %temp = %features ;
-    		
-    		push (@{$result_by_entry{$entry[0]} }, \%temp) ;
+    		if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/  ) {
+    			my @entry = ("$1","$2","$3","$4","$5","$6","$7","$8","$9","$10") ;
+
+    			if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }
+    			    		
+	    		$features{ENTRY_ENTRY_ID} = $entry[1] ;
+	    		$features{ENTRY_FORMULA} = $entry[4] ;
+	    		$features{ENTRY_CPD_MZ} = $entry[5] ;
+	    		$features{ENTRY_ADDUCT} = $entry[6] ;
+	    		$features{ENTRY_ADDUCT_TYPE} = $entry[7] ;
+	    		$features{ENTRY_ADDUCT_MZ} = $entry[8] ;
+	    		$features{ENTRY_DELTA} = $entry[9] ;
+	    		
+	    		my %temp = %features ;
+	    		push (@{$result_by_entry{$entry[0]} }, \%temp) ;
+    		}
+    		else {
+    			warn "The parsed line ($line) does not match your pattern\n " ;
+    		}
     	}
     	else {
     		next ;