Mercurial > repos > fubar > lifelines_km_cph_tool
diff lifelines_tool/lifelineskmcph.xml @ 2:dd5e65893cb8 draft default tip
add survival and collapsed life table outputs suggested by Wolfgang
author | fubar |
---|---|
date | Thu, 10 Aug 2023 22:52:45 +0000 |
parents | 232b874046a7 |
children |
line wrap: on
line diff
--- a/lifelines_tool/lifelineskmcph.xml Thu Aug 10 07:15:22 2023 +0000 +++ b/lifelines_tool/lifelineskmcph.xml Thu Aug 10 22:52:45 2023 +0000 @@ -1,6 +1,6 @@ <tool name="lifelineskmcph" id="lifelineskmcph" version="0.01"> <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> - <!--Created by toolfactory@galaxy.org at 10/08/2023 15:48:43 using the Galaxy Tool Factory.--> + <!--Created by toolfactory@galaxy.org at 10/08/2023 21:59:53 using the Galaxy Tool Factory.--> <description>Lifelines KM and optional Cox PH models</description> <requirements> <requirement version="1.5.3" type="package">pandas</requirement> @@ -104,11 +104,11 @@ else: colsok = (args.time in defaultcols) and (args.status in defaultcols) if colsok: - sys.stderr.write('replacing first row of data derived header %s with %s' % (testcols, defaultcols)) + print('Replacing first row of data derived header %s with %s' % (testcols, defaultcols)) df.columns = defaultcols else: sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): time %s and status %s do not match anything in the file header, supplied header or automatic default column names %s' % (args.time, args.status, defaultcols)) -print('## Lifelines tool starting.\nUsing data header =', df.columns, 'time column =', args.time, 'status column =', args.status) +print('## Lifelines tool\nInput data header =', df.columns, 'time column =', args.time, 'status column =', args.status) os.makedirs(args.image_dir, exist_ok=True) fig, ax = plt.subplots() if args.group > '': @@ -136,6 +136,24 @@ ax.set_title(args.title) fig.savefig(os.path.join(args.image_dir,'KM_%s.png' % args.title)) print('#### No grouping variable, so no log rank or other Kaplan-Meier statistical output is available') +survdf = lifelines.utils.survival_table_from_events(df[args.time], df[args.status]) +lifedf = lifelines.utils.survival_table_from_events(df[args.time], df[args.status], collapse=True) +print("#### Survival table using time %s and event %s" % (args.time, args.status)) +with pd.option_context('display.max_rows', None, + 'display.max_columns', None, + 'display.precision', 3, + ): + print(survdf) +print("#### Life table using time %s and event %s" % (args.time, args.status)) +with pd.option_context('display.max_rows', None, + 'display.max_columns', None, + 'display.precision', 3, + ): + print(lifedf) +outpath = os.path.join(args.image_dir,'survival_table.tabular') +survdf.to_csv(outpath, sep='\t') +outpath = os.path.join(args.image_dir,'life_table.tabular') +lifedf.to_csv(outpath, sep='\t') if len(args.cphcols) > 0: fig, ax = plt.subplots() ax.set_title('Cox-PH model: %s' % args.title) @@ -153,7 +171,7 @@ cph.fit(cphdf, duration_col=args.time, event_col=args.status) cph.print_summary() for i, cov in enumerate(colsdf.columns): - if ucolcounts[i] > 10: + if ucolcounts[i] > 10: # a hack - assume categories are sparse - if not imaginary quintiles will have to do v = pd.Series.tolist(cphdf[cov].quantile(QVALS)) vdt = df.dtypes[cov] if vdt == 'int64':