comparison lifelines_tool/lifelineskmcph.xml @ 2:dd5e65893cb8 draft default tip

add survival and collapsed life table outputs suggested by Wolfgang
author fubar
date Thu, 10 Aug 2023 22:52:45 +0000
parents 232b874046a7
children
comparison
equal deleted inserted replaced
1:232b874046a7 2:dd5e65893cb8
1 <tool name="lifelineskmcph" id="lifelineskmcph" version="0.01"> 1 <tool name="lifelineskmcph" id="lifelineskmcph" version="0.01">
2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> 2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
3 <!--Created by toolfactory@galaxy.org at 10/08/2023 15:48:43 using the Galaxy Tool Factory.--> 3 <!--Created by toolfactory@galaxy.org at 10/08/2023 21:59:53 using the Galaxy Tool Factory.-->
4 <description>Lifelines KM and optional Cox PH models</description> 4 <description>Lifelines KM and optional Cox PH models</description>
5 <requirements> 5 <requirements>
6 <requirement version="1.5.3" type="package">pandas</requirement> 6 <requirement version="1.5.3" type="package">pandas</requirement>
7 <requirement version="3.7.2" type="package">matplotlib</requirement> 7 <requirement version="3.7.2" type="package">matplotlib</requirement>
8 <requirement version="0.27.7" type="package">lifelines</requirement> 8 <requirement version="0.27.7" type="package">lifelines</requirement>
102 if colsok: 102 if colsok:
103 df.columns = testcols # use actual header 103 df.columns = testcols # use actual header
104 else: 104 else:
105 colsok = (args.time in defaultcols) and (args.status in defaultcols) 105 colsok = (args.time in defaultcols) and (args.status in defaultcols)
106 if colsok: 106 if colsok:
107 sys.stderr.write('replacing first row of data derived header %s with %s' % (testcols, defaultcols)) 107 print('Replacing first row of data derived header %s with %s' % (testcols, defaultcols))
108 df.columns = defaultcols 108 df.columns = defaultcols
109 else: 109 else:
110 sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): time %s and status %s do not match anything in the file header, supplied header or automatic default column names %s' % (args.time, args.status, defaultcols)) 110 sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): time %s and status %s do not match anything in the file header, supplied header or automatic default column names %s' % (args.time, args.status, defaultcols))
111 print('## Lifelines tool starting.\nUsing data header =', df.columns, 'time column =', args.time, 'status column =', args.status) 111 print('## Lifelines tool\nInput data header =', df.columns, 'time column =', args.time, 'status column =', args.status)
112 os.makedirs(args.image_dir, exist_ok=True) 112 os.makedirs(args.image_dir, exist_ok=True)
113 fig, ax = plt.subplots() 113 fig, ax = plt.subplots()
114 if args.group > '': 114 if args.group > '':
115 names = [] 115 names = []
116 times = [] 116 times = []
134 kmf.fit(df[args.time], df[args.status]) 134 kmf.fit(df[args.time], df[args.status])
135 kmf.plot_survival_function(ax=ax) 135 kmf.plot_survival_function(ax=ax)
136 ax.set_title(args.title) 136 ax.set_title(args.title)
137 fig.savefig(os.path.join(args.image_dir,'KM_%s.png' % args.title)) 137 fig.savefig(os.path.join(args.image_dir,'KM_%s.png' % args.title))
138 print('#### No grouping variable, so no log rank or other Kaplan-Meier statistical output is available') 138 print('#### No grouping variable, so no log rank or other Kaplan-Meier statistical output is available')
139 survdf = lifelines.utils.survival_table_from_events(df[args.time], df[args.status])
140 lifedf = lifelines.utils.survival_table_from_events(df[args.time], df[args.status], collapse=True)
141 print("#### Survival table using time %s and event %s" % (args.time, args.status))
142 with pd.option_context('display.max_rows', None,
143 'display.max_columns', None,
144 'display.precision', 3,
145 ):
146 print(survdf)
147 print("#### Life table using time %s and event %s" % (args.time, args.status))
148 with pd.option_context('display.max_rows', None,
149 'display.max_columns', None,
150 'display.precision', 3,
151 ):
152 print(lifedf)
153 outpath = os.path.join(args.image_dir,'survival_table.tabular')
154 survdf.to_csv(outpath, sep='\t')
155 outpath = os.path.join(args.image_dir,'life_table.tabular')
156 lifedf.to_csv(outpath, sep='\t')
139 if len(args.cphcols) > 0: 157 if len(args.cphcols) > 0:
140 fig, ax = plt.subplots() 158 fig, ax = plt.subplots()
141 ax.set_title('Cox-PH model: %s' % args.title) 159 ax.set_title('Cox-PH model: %s' % args.title)
142 cphcols = args.cphcols.strip().split(',') 160 cphcols = args.cphcols.strip().split(',')
143 cphcols = [x.strip() for x in cphcols] 161 cphcols = [x.strip() for x in cphcols]
151 cphdf = df[cutcphcols] 169 cphdf = df[cutcphcols]
152 ucolcounts = colsdf.nunique(axis=0) 170 ucolcounts = colsdf.nunique(axis=0)
153 cph.fit(cphdf, duration_col=args.time, event_col=args.status) 171 cph.fit(cphdf, duration_col=args.time, event_col=args.status)
154 cph.print_summary() 172 cph.print_summary()
155 for i, cov in enumerate(colsdf.columns): 173 for i, cov in enumerate(colsdf.columns):
156 if ucolcounts[i] > 10: 174 if ucolcounts[i] > 10: # a hack - assume categories are sparse - if not imaginary quintiles will have to do
157 v = pd.Series.tolist(cphdf[cov].quantile(QVALS)) 175 v = pd.Series.tolist(cphdf[cov].quantile(QVALS))
158 vdt = df.dtypes[cov] 176 vdt = df.dtypes[cov]
159 if vdt == 'int64': 177 if vdt == 'int64':
160 v = trimlegend(v) 178 v = trimlegend(v)
161 axp = cph.plot_partial_effects_on_outcome(cov, cmap='coolwarm', values=v) 179 axp = cph.plot_partial_effects_on_outcome(cov, cmap='coolwarm', values=v)