Mercurial > repos > fubar > lifelines_km_cph_tool
comparison lifelines_tool/lifelineskmcph.xml @ 2:dd5e65893cb8 draft default tip
add survival and collapsed life table outputs suggested by Wolfgang
author | fubar |
---|---|
date | Thu, 10 Aug 2023 22:52:45 +0000 |
parents | 232b874046a7 |
children |
comparison
equal
deleted
inserted
replaced
1:232b874046a7 | 2:dd5e65893cb8 |
---|---|
1 <tool name="lifelineskmcph" id="lifelineskmcph" version="0.01"> | 1 <tool name="lifelineskmcph" id="lifelineskmcph" version="0.01"> |
2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> | 2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> |
3 <!--Created by toolfactory@galaxy.org at 10/08/2023 15:48:43 using the Galaxy Tool Factory.--> | 3 <!--Created by toolfactory@galaxy.org at 10/08/2023 21:59:53 using the Galaxy Tool Factory.--> |
4 <description>Lifelines KM and optional Cox PH models</description> | 4 <description>Lifelines KM and optional Cox PH models</description> |
5 <requirements> | 5 <requirements> |
6 <requirement version="1.5.3" type="package">pandas</requirement> | 6 <requirement version="1.5.3" type="package">pandas</requirement> |
7 <requirement version="3.7.2" type="package">matplotlib</requirement> | 7 <requirement version="3.7.2" type="package">matplotlib</requirement> |
8 <requirement version="0.27.7" type="package">lifelines</requirement> | 8 <requirement version="0.27.7" type="package">lifelines</requirement> |
102 if colsok: | 102 if colsok: |
103 df.columns = testcols # use actual header | 103 df.columns = testcols # use actual header |
104 else: | 104 else: |
105 colsok = (args.time in defaultcols) and (args.status in defaultcols) | 105 colsok = (args.time in defaultcols) and (args.status in defaultcols) |
106 if colsok: | 106 if colsok: |
107 sys.stderr.write('replacing first row of data derived header %s with %s' % (testcols, defaultcols)) | 107 print('Replacing first row of data derived header %s with %s' % (testcols, defaultcols)) |
108 df.columns = defaultcols | 108 df.columns = defaultcols |
109 else: | 109 else: |
110 sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): time %s and status %s do not match anything in the file header, supplied header or automatic default column names %s' % (args.time, args.status, defaultcols)) | 110 sys.stderr.write('## CRITICAL USAGE ERROR (not a bug!): time %s and status %s do not match anything in the file header, supplied header or automatic default column names %s' % (args.time, args.status, defaultcols)) |
111 print('## Lifelines tool starting.\nUsing data header =', df.columns, 'time column =', args.time, 'status column =', args.status) | 111 print('## Lifelines tool\nInput data header =', df.columns, 'time column =', args.time, 'status column =', args.status) |
112 os.makedirs(args.image_dir, exist_ok=True) | 112 os.makedirs(args.image_dir, exist_ok=True) |
113 fig, ax = plt.subplots() | 113 fig, ax = plt.subplots() |
114 if args.group > '': | 114 if args.group > '': |
115 names = [] | 115 names = [] |
116 times = [] | 116 times = [] |
134 kmf.fit(df[args.time], df[args.status]) | 134 kmf.fit(df[args.time], df[args.status]) |
135 kmf.plot_survival_function(ax=ax) | 135 kmf.plot_survival_function(ax=ax) |
136 ax.set_title(args.title) | 136 ax.set_title(args.title) |
137 fig.savefig(os.path.join(args.image_dir,'KM_%s.png' % args.title)) | 137 fig.savefig(os.path.join(args.image_dir,'KM_%s.png' % args.title)) |
138 print('#### No grouping variable, so no log rank or other Kaplan-Meier statistical output is available') | 138 print('#### No grouping variable, so no log rank or other Kaplan-Meier statistical output is available') |
139 survdf = lifelines.utils.survival_table_from_events(df[args.time], df[args.status]) | |
140 lifedf = lifelines.utils.survival_table_from_events(df[args.time], df[args.status], collapse=True) | |
141 print("#### Survival table using time %s and event %s" % (args.time, args.status)) | |
142 with pd.option_context('display.max_rows', None, | |
143 'display.max_columns', None, | |
144 'display.precision', 3, | |
145 ): | |
146 print(survdf) | |
147 print("#### Life table using time %s and event %s" % (args.time, args.status)) | |
148 with pd.option_context('display.max_rows', None, | |
149 'display.max_columns', None, | |
150 'display.precision', 3, | |
151 ): | |
152 print(lifedf) | |
153 outpath = os.path.join(args.image_dir,'survival_table.tabular') | |
154 survdf.to_csv(outpath, sep='\t') | |
155 outpath = os.path.join(args.image_dir,'life_table.tabular') | |
156 lifedf.to_csv(outpath, sep='\t') | |
139 if len(args.cphcols) > 0: | 157 if len(args.cphcols) > 0: |
140 fig, ax = plt.subplots() | 158 fig, ax = plt.subplots() |
141 ax.set_title('Cox-PH model: %s' % args.title) | 159 ax.set_title('Cox-PH model: %s' % args.title) |
142 cphcols = args.cphcols.strip().split(',') | 160 cphcols = args.cphcols.strip().split(',') |
143 cphcols = [x.strip() for x in cphcols] | 161 cphcols = [x.strip() for x in cphcols] |
151 cphdf = df[cutcphcols] | 169 cphdf = df[cutcphcols] |
152 ucolcounts = colsdf.nunique(axis=0) | 170 ucolcounts = colsdf.nunique(axis=0) |
153 cph.fit(cphdf, duration_col=args.time, event_col=args.status) | 171 cph.fit(cphdf, duration_col=args.time, event_col=args.status) |
154 cph.print_summary() | 172 cph.print_summary() |
155 for i, cov in enumerate(colsdf.columns): | 173 for i, cov in enumerate(colsdf.columns): |
156 if ucolcounts[i] > 10: | 174 if ucolcounts[i] > 10: # a hack - assume categories are sparse - if not imaginary quintiles will have to do |
157 v = pd.Series.tolist(cphdf[cov].quantile(QVALS)) | 175 v = pd.Series.tolist(cphdf[cov].quantile(QVALS)) |
158 vdt = df.dtypes[cov] | 176 vdt = df.dtypes[cov] |
159 if vdt == 'int64': | 177 if vdt == 'int64': |
160 v = trimlegend(v) | 178 v = trimlegend(v) |
161 axp = cph.plot_partial_effects_on_outcome(cov, cmap='coolwarm', values=v) | 179 axp = cph.plot_partial_effects_on_outcome(cov, cmap='coolwarm', values=v) |