<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Bioinform Biotech</journal-id><journal-id journal-id-type="publisher-id">bioinform</journal-id><journal-id journal-id-type="index">19</journal-id><journal-title>JMIR Bioinformatics and Biotechnology</journal-title><abbrev-journal-title>JMIR Bioinform Biotech</abbrev-journal-title><issn pub-type="epub">2563-3570</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v7i1e75678</article-id><article-id pub-id-type="doi">10.2196/75678</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Random Survival Forest Versus Elastic-Net Regularized Cox Regression for Survival Prediction in Acute Myeloid Leukemia at Distinct Treatment Time Points: Model Performance Comparison Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Brady</surname><given-names>Ois&#x00ED;n</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Johnson</surname><given-names>Sean</given-names></name><degrees>DPhil</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Giles</surname><given-names>Peter</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Alvares</surname><given-names>Caroline</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zabkiewicz</surname><given-names>Joanna</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Fuentes</surname><given-names>Carolina</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>School of Computer Science and Informatics, Cardiff University</institution><addr-line>Abacws, Senghennydd Road</addr-line><addr-line>Cardiff</addr-line><country>United Kingdom</country></aff><aff id="aff2"><institution>School of Medicine, Cardiff University</institution><addr-line>Cardiff</addr-line><country>United Kingdom</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Uzun</surname><given-names>Ece</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Yan</surname><given-names>Huihuang</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Calsavara</surname><given-names>Vinicius F</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Ois&#x00ED;n Brady, BSc, School of Computer Science and Informatics, Cardiff University, Abacws, Senghennydd Road, Cardiff, CF24 4AG, United Kingdom, 44 (0)29 2087 4812; <email>bradyOP@cardiff.ac.uk</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>29</day><month>4</month><year>2026</year></pub-date><volume>7</volume><elocation-id>e75678</elocation-id><history><date date-type="received"><day>08</day><month>04</month><year>2025</year></date><date date-type="rev-recd"><day>15</day><month>11</month><year>2025</year></date><date date-type="accepted"><day>30</day><month>12</month><year>2025</year></date></history><copyright-statement>&#x00A9; Ois&#x00ED;n Brady, Sean Johnson, Peter Giles, Caroline Alvares, Joanna Zabkiewicz, Carolina Fuentes. Originally published in JMIR Bioinformatics and Biotechnology (<ext-link ext-link-type="uri" xlink:href="https://bioinform.jmir.org">https://bioinform.jmir.org</ext-link>), 29.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">http://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Bioinformatics and Biotechnology, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://bioinform.jmir.org/">https://bioinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://bioinform.jmir.org/2026/1/e75678"/><abstract><sec><title>Background</title><p>Risk group stratification based on the prediction of survival of patients with acute myeloid leukemia (AML) is complex. Despite common risk group categorization guidelines, the overall prognosis remains poor. Machine learning techniques have been shown to provide more accurate risk group stratification than conventional approaches using trial data. However, many time-to-event (TTE) models do not use training sets constrained to specific time windows, instead using aggregations of trial data.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the performance of (1) random survival forest (RSF) and (2) Cox proportional hazard regression with elastic net regularization (CoxNet) for survival prediction of patients with AML within a censoring window trained with available data recorded at discrete time points during the United Kingdom National Cancer Research Institute Acute Myeloid Leukaemia 17 randomized controlled trial (AML17).</p></sec><sec sec-type="methods"><title>Methods</title><p>For each stage in the AML17 trial, separate models were trained for each exhaustive k-choice combination of available AML17 data subsets. Data combinations for each model were further constrained according to the respective trial stage to avoid data leakage. Preliminary Pearson correlation methods were used to remove directly correlating features with the TTE prediction (time-to-death/5-y censoring point). Repeated k-fold stratified cross-validation was used on each dataset ablation to find candidate models. Permutation importance and elastic net regularization were used to monitor stability across validation folds and reduce the feature set of the highest performing stage RSF and Cox proportional hazard regression models, respectively. Finally, selected ablated models were re-evaluated using the nested, k-fold, stratified sampling cross-validation method with bootstrapping.</p></sec><sec sec-type="results"><title>Results</title><p>Concordance index ranked the best models for data constricted up to the end of induction (RSF=0.68, CoxNet=0.67), stages 1 (RSF=0.69, CoxNet=0.68), 2 (RSF=0.68, CoxNet=0.66), and 3 (RSF=0.69, CoxNet=0.63) of the trial.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study details the high prediction accuracy for time-to-survival-event predictions when training sets of CoxNet and RSF models, which are sequentially constricted to data measured up to the end of respective AML17 trial stages. The performance of these sequential TTE models is intended to justify their use as part of a wider digital twin system simulating multiple TTE outcomes for patients with AML.</p></sec></abstract><kwd-group><kwd>acute myeloid leukaemia</kwd><kwd>AML17</kwd><kwd>time-to-event</kwd><kwd>survival prediction</kwd><kwd>digital twin</kwd><kwd>random survival forest</kwd><kwd>cox proportional hazard regression</kwd><kwd>elastic net</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Cancer is an enigmatic issue with its pervasiveness seemingly as wide as the depth of its biological origins. As a group of diseases, cancer is highly variable in form, with roughly 200 types according to the National Cancer Institute [<xref ref-type="bibr" rid="ref1">1</xref>]. One such type of cancer, acute myeloid leukemia (AML), occurs from genetic abnormalities in precursory cells responsible for differentiating into mature platelets, white blood cells, and red blood cells. Genetically, AML is highly stratified, with multiple potential mutation points in the lineage of precursory cells responsible for mature blood cell types, such as within the earliest hematopoietic progenitor cell stage, or by mutation of immature intermediate blast cells [<xref ref-type="bibr" rid="ref2">2</xref>]. Ultimately, AML causes uncontrolled proliferation of immature or nonfunctional blood cells, leading to systemic immune dysfunction and organ complications. AML is often subcategorized into primary (or de novo) AML and secondary AML; secondary AML is further subdivided into therapy-related AML or acute myeloid leukemia derived from antecedent hematological disorders, such as myelodysplastic syndrome [<xref ref-type="bibr" rid="ref3">3</xref>]. Multiple genetic and environmental factors contribute to the cause and progression of AML, such as trisomy 21 (Down syndrome) [<xref ref-type="bibr" rid="ref4">4</xref>], Fanconi anemia [<xref ref-type="bibr" rid="ref5">5</xref>], Nucleophosmin 1 (NPM1) [<xref ref-type="bibr" rid="ref6">6</xref>], or FMS-like (Feline McDonough Sarcoma [<xref ref-type="bibr" rid="ref7">7</xref>]) tyrosine kinase 3 (FLT3) [<xref ref-type="bibr" rid="ref8">8</xref>] mutations. External factors associated with AML include prolonged exposure to benzene [<xref ref-type="bibr" rid="ref9">9</xref>], history of smoking [<xref ref-type="bibr" rid="ref10">10</xref>], or cancer therapy&#x2013;induced AML [<xref ref-type="bibr" rid="ref11">11</xref>]. This heterogeneous nature of AML attributes to its difficulty to precisely diagnose and treat effectively [<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>Deriving accurate survival predictions of patients with AML is an important foundational step in establishing risk groups upon which accurate treatment methods can be produced. Resultingly, there is a great emphasis on risk group stratification of patients with AML as a precursory step for optimized resource allocations and identification of biomarkers contributing to treatment response, as seen in the European LeukemiaNet (ELN) project [<xref ref-type="bibr" rid="ref13">13</xref>] or the World Health Organization (WHO) risk grading of AML [<xref ref-type="bibr" rid="ref14">14</xref>]. Within recent years, the availability of genomic data through next-generation sequencing (NGS) [<xref ref-type="bibr" rid="ref15">15</xref>] techniques combined with randomized controlled trial datasets, such as &#x201C;the United Kingdom National Cancer Research Institute Acute Myeloid Leukaemia 17 (AML17) trial&#x201D; [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>], offers a wealth of information to make inferences on the disease and improved treatments. Indeed, 5-year overall survival rates for AML have improved, from 13% in 1970 to 55% (<italic>P</italic>&#x003C;.001) in 2010 for patients younger than the age of 60 years at the MD Anderson Cancer Center hospital and from 8% in 1970 to 17% for those older than 60 years [<xref ref-type="bibr" rid="ref19">19</xref>]. The Surveillance, Epidemiology, and End Results program based in the United States estimates the 5-year overall survival rate of patients with AML to be 31.9% based on survival data between 2014 and 2020, and estimates 20,800 new cases of the disease, constituting approximately 1% of new cancer cases in the United States in 2024, with 11,220 AML-related deaths [<xref ref-type="bibr" rid="ref20">20</xref>].</p><p>Despite improvements in outcome, it is apparent that traditional hierarchical approaches for risk grouping are not able to capture the full complexity involved in stratification of AML [<xref ref-type="bibr" rid="ref21">21</xref>], shown by the still dismal net survival rate of 13.6%, 5 years after diagnosis in England [<xref ref-type="bibr" rid="ref22">22</xref>]. With the sheer quantity of data now available from NGS and randomized controlled trial databases, alternative machine learning (ML) techniques used within oncology [<xref ref-type="bibr" rid="ref23">23</xref>] have been shown to capture complex features stratifying patients with AML [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. The United Kingdom National Cancer Research Institute (UK-NCRI) AML17 trial contains detailed records of clinical, longitudinal minimal residual disease (MRD) reports and genetic sequence mutation profiles of approximately 3142 patients with AML younger than 60 years between 2007 and 2014. Such a large, time-based dataset offers an ideal training set for ML models to capture complex risk stratification of the disease. The original AML17 protocol used standard statistical methods, such as the log-rank test for time-to-event (TTE) outcomes (survival for all randomizations), Mantel-Haenszel tests for dichotomous outcomes, and Wilcoxon rank-sum and <italic>t</italic> tests for resource usage data. It has more recently been shown that data from AML17 can be used for highly accurate ML risk group stratification based on survival prediction. Tazi et al [<xref ref-type="bibr" rid="ref26">26</xref>] applied several ML models trained on demographic, diagnostic, and genetic variables from several UK-NCRI AML trials, including AML17. By fitting models to predict overall survival via TTE of patient death up to censoring points, patients could be stratified by predicted survival risk measurements and separated into distinct groups based on delineating features. When compared with the ELN guideline, this new framework restratified 1 in 4 patients, with significantly improved prognostic accuracy. Another study using the following AML18 trial [<xref ref-type="bibr" rid="ref27">27</xref>] used a random survival forest (RSF) model to update risk group stratification categories based on overall survival using age, sex, white blood cell count, gene mutations, and cytogenetic abnormalities of patients. Subsequently, numerous patients were restratified from risk groups in the standard 2022 ELN guideline, which could be used to retrospectively identify more optimal treatment paths [<xref ref-type="bibr" rid="ref28">28</xref>].</p><p>Several ML models are specifically designed or adapted for TTE outcome prediction using right-censored data [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>] as seen in AML trial datasets.</p><p>One such ML model, an adaptation of the random forest algorithm [<xref ref-type="bibr" rid="ref31">31</xref>], RSF [<xref ref-type="bibr" rid="ref32">32</xref>], as previously mentioned, has been used for time-dependent survival predictions, which excludes the proportional hazards assumption of statistical Cox proportional hazard regression (CPHR) models. CPHR is a statistical model commonly used across multiple scientific domains, including cancer research [<xref ref-type="bibr" rid="ref33">33</xref>] for TTE predictions. In the case of AML, where multiple interacting and time-dependent biomarkers affect survival outcome [<xref ref-type="bibr" rid="ref12">12</xref>], collinear features can negatively impact prediction accuracy when the independent features and proportional hazards assumptions of CPHR models are violated [<xref ref-type="bibr" rid="ref34">34</xref>]. In such cases, the standard CPHR model can be adapted using regularization techniques such as the &#x201C;Elastic Net&#x201D; method (also known as &#x201C;CoxNet&#x201D; [Cox proportional hazard regression with elastic net regularization]) [<xref ref-type="bibr" rid="ref35">35</xref>].</p><p>Performance between the 2 models varies depending on the datasets and implementation. Several instances in literature show that RSF prediction is comparable with or even outperforms CPHR models [<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>]. However, the converse is also referenced [<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>], suggesting that the application of these models is highly dependent on initial training datasets, preprocessing, model building methodologies, and the overall complexity of the predicted outcome. Pickett et al [<xref ref-type="bibr" rid="ref42">42</xref>] conclude that RSF performs best when leveraging its nonlinear nature with multiple, longitudinal data points, many of which have unknown levels of significance.</p><p>None of the ML-based studies reviewed involved static and longitudinal training sets that were constricted according to trial time frames and sequentially exposed to more data, instead using an aggregation of data. This study seeks to investigate the predictive performance of individual TTE models, beginning with 5-year survival status, which are sequentially exposed only to data available up to the conclusion of major time points in the AML17 trial. RSF and CPHR with Elastic Net have been chosen as TTE predictive models given their previous application in this context. A pipeline involving necessary data preprocessing, feature selection, and hyperparameter tuning used to build each will be detailed. Finally, after evaluation using the primary concordance index (c-index) metric alongside additional dynamic area under the receiver operating characteristic curve (also known as dynamic AUC) and Brier loss scores, the optimal models for survival prediction at select trial stages will be selected for future analysis. Future studies will analyze select model feature importance and significance with respect to state-of-the-art literature on AML risk stratification. The generalized pipeline will also serve as a template that can be adapted for additional TTE predictions other than death status. In the wider context of a &#x201C;digital twin&#x201D; [<xref ref-type="bibr" rid="ref43">43</xref>] system, this multiple time-constrained model approach could provide accurate simulations of a wide variety of patient outcomes, not solely focused on risk stratification but also patient quality of life (QoL) and optimized care for additional comorbidities during treatment.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>AML17 Trial Data</title><p>Data are sourced from the AML17 [<xref ref-type="bibr" rid="ref17">17</xref>] drug trial for patients younger than 60 years, which includes 3142 clinical records. Patient clinical records are combined with MRD (n=2587), NGS mutation profiles (n=3579), and a separate collection of <italic>NPM</italic>1 [<xref ref-type="bibr" rid="ref6">6</xref>] and <italic>FLT</italic>3 specific mutation profiles (n=3142) [<xref ref-type="bibr" rid="ref44">44</xref>]. A pseudonymization process converted patient trial IDs into dummy IDs before data access, ensuring compliance with participant privacy and data protection regulations. Access to the pseudonymized dataset was stored on the Cardiff University Research Data Store [<xref ref-type="bibr" rid="ref45">45</xref>] with access restricted to authorized researchers. Clinical records contain measures at induction, including previous blood disorders, height, weight, the French-American-British 8-category AML classification [<xref ref-type="bibr" rid="ref46">46</xref>], WHO and Eastern Cooperative Oncology Group performance status [<xref ref-type="bibr" rid="ref47">47</xref>], cytogenetic, karyotype, ethnic background, and more. Metadata references to all used data are available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref><xref ref-type="supplementary-material" rid="app2"/><xref ref-type="supplementary-material" rid="app3"/>-<xref ref-type="supplementary-material" rid="app4">4</xref>.</p><p>After early diagnostic and comorbidity measurements during the induction stage, the proceeding 4 stages contain longitudinal records on periodic treatment, response, toxicity, and supportive care. The MRD subset includes quantitative polymerase chain reaction on peripheral blood and bone marrow samples, and multiparameter flow cytometry using the leukemia-associated immunophenotype and &#x201C;different from normal&#x201D; techniques. MRD measurements are longitudinal; the trial protocol involved readings at the end of each major trial stage investigated in this study. AML17 collected FLT3 and NPM1 mutation profiles with automated flow cytometry and manual bone marrow and peripheral blood cytology measurements at days 2&#x2010;3 from patient induction to the trial.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>Access to data from the UK-NCRI AML17 clinical trial was provided by the Cardiff University Centre for Trials Research [<xref ref-type="bibr" rid="ref48">48</xref>], which curates and governs the trial database.</p><p>All data were pseudonymized before being released to the research team. No direct patient identifiers were included in the dataset. All analyses were conducted on secure Cardiff University computing infrastructure. Ethical approval for the use of these data was granted by the Cardiff University School of Computer Science and Informatics Research Ethics Committee on February 28, 2025 (approval COMS/Ethics/2024/014). The research used secondary analysis of previously collected clinical trial data and did not involve direct contact with participants. Participants in the original AML17 trial provided written informed consent for their data to be used for research purposes. No compensation was provided to participants for this study, as it involved secondary analysis of previously collected trial data.</p></sec><sec id="s2-3"><title>Data Cleaning</title><sec id="s2-3-1"><title>Overview</title><p>Paper Case Report Forms recorded data throughout the AML17 trial. An initial screening process of longitudinal data found value errors in the exported dataset, predominantly within date fields. The following sections detail the conditions for sample exclusions based on erroneous record entries.</p></sec><sec id="s2-3-2"><title>Erroneous Record Removals</title><p>Most detectable erroneous values are date records, leading to the exclusion of 85 patient records with date values written outside of the trial time bounds between 2007 and 2014 (excluding annual follow-up dates that proceed after the official trial end date, ie, July 31, 2014) or with nonsequential or otherwise nonchronological trial stage entry times, likely due to data entry errors or outstanding queries with sites at trial closure. The exclusion of nonsequential course start dates removed 19 patient records from the study. After all initial exclusions, 3057 AML patients remained eligible for model training.</p></sec><sec id="s2-3-3"><title>Feature Removals Before Feature Selection</title><p>The pseudonymized dummy ID was dropped before model training to avoid spurious correlation from potential protocol batch induction bias. Other exclusions include nonstandardized clinician notes, making them highly varied text fields, which are not immediately processable by RSF and CPHR models. Traditional preprocessing methodologies, such as dummy encoding, would introduce many Boolean representations of these features, most of which, given their variability, would have occurrences recorded seldomly, increasing data sparsity and potentially increasing model risk of overfitting [<xref ref-type="bibr" rid="ref49">49</xref>]. Consequently, categorical or continuous features from these columns cannot be immediately cataloged. Information held in these fields is of potential clinical and ML model importance. However, additional preprocessing techniques are needed to scrape the potentially multiple continuous and categorical features existing in a single record. Data capture would also need to handle the detection of differently written versions of the same category (eg, syntactic, spelling, or grammatical variations). Data mining using ML techniques, such as the usage of natural language processors [<xref ref-type="bibr" rid="ref50">50</xref>], may be explored for text classification of these fields in future studies. A breakdown of such excluded features is available in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p></sec></sec><sec id="s2-4"><title>Preprocessing</title><sec id="s2-4-1"><title>Overview</title><p>Clinical, NGS, FLT-3, and MRD status records initially existed as separate pseudonymized comma-separated values (csv) files exported from the original AML17 trial database within the Cardiff University Centre for Trials Research. Each file was merged using the pseudonymized &#x201C;DummyID&#x201D; of each patient to produce unique patient records of all exported csv data. The training set was initialized using the Python &#x201C;Pandas&#x201D; [<xref ref-type="bibr" rid="ref51">51</xref>] library &#x201C;DataFrame&#x201D; object [<xref ref-type="bibr" rid="ref51">51</xref>], storing merged patient records. The data type was specified for each column and programmatically converted individual records that violate the expectation, if possible; otherwise, the patient record was dropped. This ensured that each feature vector is readable to the applied CoxNet and RSF models. The following sections define the data preparation steps necessary for model training.</p></sec><sec id="s2-4-2"><title>Feature Set Data Representations</title><p>The AML17 dataset contains 2 general data types&#x2014;continuous and categorical. Continuous features were scaled using Sci-Kit Learn&#x2019;s &#x201C;StandardScaler&#x201D; function [<xref ref-type="bibr" rid="ref52">52</xref>], which computes a standard score of each sample based on its variance from the mean of the feature vector. The standardized continuous Unix Epoch time is used to represent all instances of date fields. Unix Epoch time denotes the total nonleap seconds elapsed since 00:00:00 UTC on January 1, 1970 [<xref ref-type="bibr" rid="ref53">53</xref>]. While scaling is not a requirement for tree-based ensemble models, such as RSF, whose results are insensitive to the transformation [<xref ref-type="bibr" rid="ref54">54</xref>], applying it to continuous variables avoids scenarios where features that are orders of magnitude higher than others influence the objective function disproportionately within CPHR models. This also standardizes data representations for training sets of both models. Dummy encoding is used to convert categorical features into discrete Boolean features for each level, which is readable to the CPHR model.</p><p>Given that the AML17 dataset includes many categorical features, dummy encoding drastically increases the total number of features fed into both models, increasing the potential complexity of the model and potentially introducing overfitting. Feature reduction techniques are used to attenuate this. For RSF, the permutation importance [<xref ref-type="bibr" rid="ref55">55</xref>] technique is used to quantify feature importance. This involves randomly shuffling feature values a set number of times and measuring the effect on model performance each time through the c-index evaluation. Degradation of performance when changing these values indicates the RSF&#x2019;s relative reliance on a particular feature. The combination of 2 regularization methods, known as elastic net, was used to reduce the CPHR feature set. This combines <inline-formula><mml:math id="ieqn1"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x2113;</mml:mi><mml:mn>1</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>and <inline-formula><mml:math id="ieqn2"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x2113;</mml:mi><mml:mn>2</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> regularization methods [<xref ref-type="bibr" rid="ref35">35</xref>], simultaneously handling issues of collinearity within the dataset as well as feature reduction.</p></sec><sec id="s2-4-3"><title>TTE Predictor Variable</title><p>CoxNet, RSF, and similar models used for time-based prediction, such as support vector machines for survival [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>], use a target variable known as the &#x201C;time-to-event&#x201D; (TTE) variable, formalized by <xref ref-type="disp-formula" rid="E1">Equation 1</xref> :</p><disp-formula id="E1"><label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mo movablelimits="true" form="prefix">min</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing="0.8em 0.2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:mi>t</mml:mi></mml:mtd><mml:mtd><mml:mtext>if&#x00A0;</mml:mtext><mml:mi>&#x03B4;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>c</mml:mi></mml:mtd><mml:mtd><mml:mtext>if&#x00A0;</mml:mtext><mml:mi>&#x03B4;</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"/></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn3"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B4;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is a Boolean value representing event occurrence (in this instance, patient death), <italic>t</italic> being the time range from patient trial induction to the event, and <italic>c</italic> being the time range from patient trial induction to the censoring threshold.</p><p><xref ref-type="disp-formula" rid="E1">Equation 1</xref> is modified to shift the censoring window of patient records to 5 years from induction, formalized as <xref ref-type="disp-formula" rid="E2">Equation 2</xref>:</p><disp-formula id="E2"><label>(2)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:mi>t</mml:mi></mml:mtd><mml:mtd><mml:mtext>if</mml:mtext><mml:mi>&#x03B4;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2227;</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>c</mml:mi></mml:mtd><mml:mtd><mml:mtext>otherwise</mml:mtext></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"/></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>This accounts for instances of patients who have died on either side of a 5-year censoring window, a threshold seen in follow-up analysis of AML17 [<xref ref-type="bibr" rid="ref58">58</xref>]. A 5-year cutoff point provided a more even distribution of noncensored patients than lower thresholds for TTE prediction models. Clinical variables define the TTE variable as a tuple: <inline-formula><mml:math id="ieqn4"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>&#x03B4;</mml:mi><mml:mo>,</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, with inclusion of <inline-formula><mml:math id="ieqn5"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>t</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> or <inline-formula><mml:math id="ieqn6"><mml:mi>c</mml:mi></mml:math></inline-formula> depending on the censoring status of the event as described in <xref ref-type="disp-formula" rid="E2">equation 2</xref>. Both the ablation and final model tuning methods stratify each fold to ensure that the distributions of TTE indicators are approximately equally distributed with respect to the entire cohort to avoid nonrepresentative sampling issues.</p></sec><sec id="s2-4-4"><title>Trial-Stage Sensitive Ablation Study</title><p>To determine what broad groups of initial training datasets were most important for each model, we conducted an ablation study that selected possible combinations of NGS, FLT3 and NPM1, clinical, and MRD data subsets. C-index evaluations of tuned and validated models ranked the combination relative importance. Permutation importance [<xref ref-type="bibr" rid="ref55">55</xref>] and nonzero coefficient values determined individual feature importance for RSF and CoxNet models, respectively. Multiple RSF and CoxNet models predicted the TTE target after being trained on subsets of patients surviving up to the end of each AML17 course stage, that is, induction, C1, C2, and C3. We trained models at each stage on varying degrees of information based on precomputed data subset combinations. The data pulled from clinical and MRD subsets were constrained such that each model only had access to features recorded up to the end of their respective course stage to avoid potential data leakage and provide predictions using data measurements only available up to specific trial time points. We define the set of training data combinations as all possible K-choice, non&#x2013;order-specific, nonrepeating items at each trial stage, formalized as:</p><disp-formula id="E3"><label>(3)</label><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:munderover><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:munderover><mml:mfrac><mml:mrow><mml:mn>3</mml:mn><mml:mo>!</mml:mo></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>3</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>!</mml:mo><mml:mi>k</mml:mi><mml:mo>!</mml:mo></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>35</mml:mn></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn7"><mml:mi>c</mml:mi></mml:math></inline-formula> represents the current trial stage (induction, C1, &#x2026;, and C3), and <inline-formula><mml:math id="ieqn8"><mml:mi>k</mml:mi></mml:math></inline-formula> the number of selected data sources.</p><p>The protocol recorded FLT3, NPM1, and a broader collection of NGS mutation panel measurements for patients 2&#x2010;3 days after their randomized induction to the trial; therefore, not necessitating further constriction, as all-time points are set after recording of these data.</p><p>Direct Pearson linear correlations to death status determined the exclusion of features from all data combinations. Likewise, we excluded categorical features with options that inferred patient death status. Excluded features are detailed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref><xref ref-type="supplementary-material" rid="app2"/>-<xref ref-type="supplementary-material" rid="app3">3</xref>. In total, with 4 time point stages (induction, C1, C2, and C3), the total k-choice combinations across each stage equaled 35 training sets. These trained a combined total of 70 CoxNet and RSF models.</p></sec></sec><sec id="s2-5"><title>Model Building</title><sec id="s2-5-1"><title>Overview</title><p>The model building process is divided into four major phases, that are (1) preprocessing; (2) ablation study; (3) final evaluation, based on the highest performing candidate ablations for RSF and CoxNet models at each trial stage; and (4) a baseline risk model comparison, which compares RSF and CoxNet c-indices against a standard Cox model used within the trial protocol to stratify patients post stage 1.</p></sec><sec id="s2-5-2"><title>Preprocessing Phase</title><p>The following defines the overall steps involved in the preprocessing pipeline on data that are made consistent between both of the following phases. Cleaning methods detailed in earlier sections have been excluded for simplicity (refer to Data Cleaning section). The preprocessing pipeline is called and fit to data only available within the scope of the fold used within cross-validation (CV) of the ablation study and final model evaluation phases.</p><list list-type="order"><list-item><p>Drop all features with total missing entries &#x003E;95%.</p></list-item><list-item><p>Create missing indicator features for each feature with at least 1 occurrence of a missing value.</p></list-item><list-item><p>Flatten the NGS data subset of gene mutation entries, dummy encode, and merge with the rest of the combined dataset (clinical, MRD, FLT3, and NPM1 subsets). Encoding is based only on available samples within the fold to avoid potential leakage of nonfold sample gene mutation entries.</p></list-item><list-item><p>Dummy encode karyotype features, labeling rare entries (n&#x003C;5) to a &#x201C;rare_class&#x201D; category to avoid dimensionality explosions.</p></list-item><list-item><p>Dummy encode all other standard categorical features.</p></list-item><list-item><p>Preserve the ordinality of the identified ordinal record by keeping them as individual features, using predefined integer mappings from the protocol. Missing features are labeled with the sentinel value &#x2212;999 consistently outside of all ordinal ranges.</p></list-item><list-item><p>Scale identified numerical features by removing the mean and scaling to unit variance (using Sci-Kit Learn&#x2019;s StandardScaler)</p></list-item></list></sec><sec id="s2-5-3"><title>Ablation Study Phase</title><p>The goal of this phase is to act as a sensitivity analysis of the major data sources available from the AML17 trial. By using every possible combination of these data sources (refer to <xref ref-type="disp-formula" rid="E3">equation 3</xref>), the most influential data can be determined using the average c-index performance. This, in turn, acts as a feature reduction step and ensures selected ablations for downstream analysis are using features of importance relative to the specific model and respective trial stage.</p><p>The following steps define the preliminary ablation study phase. For each ablation at each trial stage, the cohort applies repeated (n=3), stratified, 5-fold CV using a consistent random state seed for reproduction.</p><list list-type="order"><list-item><p>Feature preprocessing phase pipeline fit to the training set and transformed on the train and validation set within the fold scope.</p></list-item><list-item><p>Train a baseline RSF model on the fold&#x2019;s training set.</p></list-item><list-item><p>Apply permutation importance on the baseline model (using 150 ensemble estimators and a consistent randomized state seed), recording feature stability per repeated fold.</p></list-item><list-item><p>Measure the c-index and inverse probability of censoring weights (IPCW) c-index of the baseline ablation RSF model.</p></list-item><list-item><p>For the same fold repetition, train a baseline CoxNet model and record feature stability using model coefficient values.</p></list-item></list></sec><sec id="s2-5-4"><title>Final Evaluation Phase</title><p>The following steps define the final model-building and evaluation phase:</p><list list-type="order"><list-item><p>At each stage, select a CoxNet and RSF ablation model with the highest recorded average c-index across all repeated CV folds from the ablation study.</p></list-item><list-item><p>For each selected model, using their respective ablated dataset and trial cohort, use nested k-fold, stratified CV, with sample shuffling and the same randomization seed used consistently across all experiments.</p><list list-type="alpha-lower"><list-item><p>The outer loop is reserved for unbiased performance estimation across 10 folds of the training set.</p></list-item><list-item><p>The inner loop is reserved for hyperparameter tuning across 3 folds. Validation samples are never included in model training in their respective loop, ensuring strict separation between training and validation data to avoid bias or overfitting. Grid search spaces for hyperparameter tuning of the models are:</p><list list-type="roman-lower"><list-item><p>RSF: &#x2018;n_estimators&#x2019; = [500, 750, 1000, 1250, 1500]</p></list-item><list-item><p>RSF: &#x2018;max_features&#x2019; = [&#x2018;sqrt,&#x2019; &#x2018;log2,&#x2019; 0.33, 0.5]</p></list-item><list-item><p>RSF: &#x2018;max_features&#x2019; = [3, 5, 10, 15]</p></list-item><list-item><p>CoxNet: &#x2018;l1_ratio&#x2019; = [0.01, 0.25, 0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 1.0]</p></list-item></list></list-item><list-item><p>Record fold&#x2019;s bootstrapped performance estimates across 250 bootstrapped samples (250 samples chosen as a compromise between sample variance for CI precision and processing time constraints with the already expensive nested CV operations). Metrics include c-index, IPCW c-index, dynamic AUC, and dynamic Brier loss.</p></list-item></list></list-item></list><p>As fold event times vary across bootstraps, dynamic AUC and Brier loss metrics for fold bootstrap samples are interpolated to the nearest predefined &#x201C;universal&#x201D; time points for consistency between selected samples across the validation process.</p></sec><sec id="s2-5-5"><title>Baseline Risk Model Comparison Phase</title><p>For comparison with the Cox linear regression model used in the AML17 trial protocol for risk assessment used on the cohort ending their first stage of treatment, an additional <italic>non-nested</italic> grid search CV assessment on a 90%:10% train-test split with 1000 bootstrapped samples was conducted. We recorded dynamic AUC, Brier loss, IPCW c-index, and c-index scores of both models at this stage. The purpose of this analysis is to suggest improved c-index performance of the respective CoxNet and RSF stage models against the protocols model. The rationale for using larger sample sizes for this assessment was to provide a more realistic indication of the model&#x2019;s performance relative to the protocol&#x2019;s model when trained with a sample size closer to real-world cases. As the test set is smaller and overlaps with hyperparameter tuning, this assessment is inherently optimistic. Therefore, it remains that the nested CV results with bootstrapped CIs illustrated in the Final Evaluation Phase section remain as the primary, more sensitive, and pessimistic evaluation of generalized model performance.</p></sec></sec><sec id="s2-6"><title>Model Evaluations</title><sec id="s2-6-1"><title>Overview</title><p>The c-index evaluated model performance from a held-out test set not used in previous training. The standard (Harrel) c-index used for survival model evaluation is dependent on the distribution of the censored events. Therefore, to avoid potential bias, we recorded an alternative adapted form of c-index based on the IPCW. However, likely due to the inner handling of censoring by the models, differences in IPCW c-index and standard c-index were none, or at most minuscule, so standard c-index remained the primary performance metric for model assessments. Both standard and IPCW adapted c-index results for all final models have been included for transparency. Secondary performance metrics involved:</p><list list-type="order"><list-item><p>Dynamic AUC [<xref ref-type="bibr" rid="ref59">59</xref>] assessed predictive performances across patients selected at discrete time points from the end of the model&#x2019;s respective trial stage to the 5-year censoring point.</p></list-item><list-item><p>A time-dependent Brier loss score, measuring mean square difference between predicted and real TTEs at iterative time points, indicated the models&#x2019; calibration.</p></list-item></list></sec><sec id="s2-6-2"><title>Cumulative-Dynamic AUC</title><p>This performance metric assesses the model&#x2019;s ability to discriminate between patients who experience an event before a specific time period (<inline-formula><mml:math id="ieqn9"><mml:mi>t</mml:mi><mml:mi> </mml:mi></mml:math></inline-formula>), and those who experience an event after [<xref ref-type="bibr" rid="ref59">59</xref>]. AUC ranges from 0 to 1 inclusively, with higher values indicating better discrimination between patient events before and after <inline-formula><mml:math id="ieqn10"><mml:mi>t</mml:mi><mml:mi> </mml:mi></mml:math></inline-formula>.</p></sec><sec id="s2-6-3"><title>Dynamic Brier Loss Score</title><p>This performance metric assesses how well a model is calibrated, evaluating how closely model predictions match the real labeled TTE variable of a patient at time point <inline-formula><mml:math id="ieqn11"><mml:mi>t</mml:mi><mml:mi> </mml:mi></mml:math></inline-formula>, typically referred to as the &#x201C;ground truth.&#x201D; This is done by evaluating the difference in mean square predicted event times and the ground truth TTE at <inline-formula><mml:math id="ieqn12"><mml:mi> </mml:mi><mml:mi>t</mml:mi><mml:mi> </mml:mi></mml:math></inline-formula>. Brier loss at <inline-formula><mml:math id="ieqn13"><mml:mi>t</mml:mi><mml:mi>&#x2008;</mml:mi><mml:mi> </mml:mi></mml:math></inline-formula> is measured between 0, for models with perfect accuracy, and 1, for perfect inaccuracy. The integrated Brier loss score can also be measured to evaluate overall model calibration throughout the 5-year period since patient induction.</p></sec></sec><sec id="s2-7"><title>Pipeline Summary</title><p>A diagrammatic summary of the model training pipeline, briefly describing the 4-phase process detailed before, is presented in <xref ref-type="fig" rid="figure1">Figure 1</xref>. Descriptions of CoxNet and RSF are detailed in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref> and <xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>. Code for relevant experiments is available in <xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Summary of the 4-phase pipeline process for survival time-to-event prediction model building at the AML17 trial stages. AML17: United Kingdom National Cancer Research Institute Acute Myeloid Leukaemia 17 randomized controlled trial; AUC: area under the receiver operating characteristic curve; c-index: concordance index; CoxNet: Cox proportional hazard regression with elastic net regularization; CV: cross-validation; FLT3: Feline McDonough sarcoma-Like Tyrosine kinase 3; MRD: minimal residual disease; NGS: next-generation sequencing; NPM1: Nucleophosmin 1; RSF: random survival forest.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="bioinform_v7i1e75678_fig01.png"/></fig></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>Results are composed of, first, event (patient death within censoring window) status and timing distributions are measured, describing the spread of target events across cohorts and time for RSF and CoxNet models. Second, a Kaplan-Meier survival curve is shown for each cohort, including the full AML17 cohort, after cleaning for erroneous TTE indicators described in the Data Cleaning section. These curves provide a visual summary of the baseline survival patterns between cohorts, which the stage-specific survival models are tasked with capturing. Third, feature missingness correlations are visualized using heatmaps for data sources across AML17 cohorts, highlighting potential missingness mechanisms and motivating the usage of missing indicator variables for models. Fourth, <xref ref-type="table" rid="table1">Table 1</xref> reports feature set sizes before and after reduction steps for RSF and CoxNet models, showing how reduction methods remove redundant features, focusing on the most informative and stable predictors quantitatively selected during phase 3 of the methodology. Fifth, c-index, dynamic AUC, and dynamic Brier quantify ranking accuracy, time-dependent discrimination, and overall predictive accuracy for each stage, providing a suite of metrics for cross-reference with similar work and reproducibility. Finally, feature importance is visualized using Venn diagrams of overlapping top-ranked features between stage-specific RSF and CoxNet models, and vertical bar charts illustrate the relative importance of the top 30 highest-ranking predictors in each model.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Feature set reductions for each RSF<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> and CoxNet<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> model at their corresponding trial stage.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Trial stage</td><td align="left" valign="bottom">Feature reduction (n before, n after, n after encoding)</td></tr></thead><tbody><tr><td align="left" valign="top">Post induction</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: 78, 70, 392</p></list-item><list-item><p>CoxNet: 56, 45, 205</p></list-item></list></td></tr><tr><td align="left" valign="top">Post-C1</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: 156, 126, 479</p></list-item><list-item><p>CoxNet: 115, 85, 225</p></list-item></list></td></tr><tr><td align="left" valign="top">Post-C2</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: 228, 173, 596</p></list-item><list-item><p>CoxNet: 228, 168, 463</p></list-item></list></td></tr><tr><td align="left" valign="top">Post-C3</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: 290, 142, 478</p></list-item><list-item><p>CoxNet: 211, 174, 452</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>RSF: random survival forest.</p></fn><fn id="table1fn2"><p><sup>b</sup>CoxNet: Cox proportional hazard regression with elastic net regularization.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Target Class Distributions</title><p>TTE (event=death status) of both models is the predictor variable of both models. Class imbalances influence the overall performances of a model [<xref ref-type="bibr" rid="ref60">60</xref>]. There were no major class distribution imbalances for each of the 5 trial stages used for model training. No training sets had minority class percentages &#x003C;41% and the average minority class percentage was approximately 44% (induction=42.8%, C1=46.9%, C2=48.9%, C3=41.3%, C4=42.9%). In literature, there is no definitive threshold at which imbalances are considered severe enough to affect the performance of ML models. A rule of thumb is that an imbalance is considered &#x201C;moderate&#x201D; when minority classes are 1%&#x2010;20% of the dataset [<xref ref-type="bibr" rid="ref61">61</xref>]. Since this was not the case, the use of over- or undersampling techniques or synthetic data generation techniques such as the Synthetic Minority Oversampling Technique [<xref ref-type="bibr" rid="ref62">62</xref>] was deemed not necessary.</p></sec><sec id="s3-3"><title>Event Time Distributions</title><p>Analysis of event time distributions in <xref ref-type="fig" rid="figure2">Figure 2</xref> shows a disproportionate number of right-censored events for patient sets used for each of the 5 selected trial stages. This initially justified the usage of a c-index based on IPCW, which is specifically adapted for this situation [<xref ref-type="bibr" rid="ref63">63</xref>] rather than the standard c-index, which is dependent on TTE distributions. However, it was found that differences between c-index and adapted IPCW c-index readings of CoxNet and RSF models were identical for stage models (eg, postinduction stage mean c-index=0.6760, mean IPCW c-index=0.6760). Full performance metric results across all stage ablation models are included in <xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Distribution of time-to-event target variables in each of the four trial stages with censoring 5 years from patient induction. The distribution is irregular for all stages with the final histogram bin of induction, C1, C2, and C3 stages (C4 was excluded from further experiments because of small sample size concerns [n=177]).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="bioinform_v7i1e75678_fig02.png"/></fig></sec><sec id="s3-4"><title>Cohort Kaplan-Meier Survival Curves</title><p>Kaplan-Meier survival curves for each patient cohort for modeling show distinct survival patterns between trial stage cohorts, excluding the full cohort upon trial entry and the postinduction stage, as it began within 1&#x2010;3 days according to the AML17 protocol. This gives an indication of the distinguishable baseline survival patterns that vary between AML17 stage cohorts, which each model is tasked to predict. For visual clarity, the posttreatment stage 4 cohort was excluded from <xref ref-type="fig" rid="figure3">Figure 3</xref>, as it overlaps across many cohorts. Note that the posttreatment stage 4 was excluded from further modeling due to small sample size concerns (n=177).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Kaplan-Meier survival curves for each cohort used by random survival forest and Cox proportional hazard regression with elastic net regularization models after data cleaning. The full AML17 cohort (post data cleaning) is also shown (blue). Shaded regions represent upper and lower CIs, solid lines (made dashed orange for postinduction for readability) represent average survival probability at the time point. Vertical dashed lines show trial stage cohort earliest start times since induction, with exact values shown in the legend.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="bioinform_v7i1e75678_fig03.png"/></fig></sec><sec id="s3-5"><title>Data Missingness</title><p>Data missingness, nonexistent trial records, per trial stage cohort, and full AML17 cohort (post&#x2013;data cleaning record exclusions) have been visualized using heatmaps for missingness correlation between features and frequency missingness plots for overall record missingness. Given the volume of total features, each figure has been further divided by data subset (eg, Clinical or NGS). The following matrix shows the missingness correlation between all clinical variables across the whole AML17 cohort used in this study. This provides justification for the usage of the missingness indicator features supplied as additional predictors for each model, which can be used to assess the informativeness of missingness and additional follow-up analysis. All additional figures have been included in <xref ref-type="supplementary-material" rid="app10">Multimedia Appendix 10</xref>.</p><p>Across the entire AML17 cohort, the clinical subset has strong missingness correlations, suggesting a possible missing not at random mechanism for clinical data recorded at specific trial stages, indicated by the dark blue blocks of longitudinal features recorded at each trial stage in <xref ref-type="fig" rid="figure4">Figure 4</xref>. This is expected for any patients failing to proceed to a stage due to death or exclusion criteria.</p><p>When restricted to individual stage cohorts, the missingness correlation of clinical data shows a less pronounced block of strong positive relations, as this stage excludes patients who died beforehand (and thus have missing records) or who were otherwise no longer eligible based on trial protocols. Correlations that remain often are clinically explainable, for instance, records for comorbidity timings, such as those indicating nausea durations (&#x201C;NauseastartC1&#x201D; and &#x201C;NauseastopC1&#x201D;), are obviously both missing if the patient did not have such a symptom. However, to avoid possible assumptions, such variables, even if frequently missing across entire cohorts, were investigated for informativeness to model survival predictions by including an additional missing indicator variable for each; informativeness was then identified through stable feature importance analysis of final models illustrated in the Feature Importances section.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Missingness correlation per feature across the full AML17 cohort for the clinical data subset.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="bioinform_v7i1e75678_fig04.png"/></fig></sec><sec id="s3-6"><title>Final Optimized Trial Stage Model Measurements</title><p>The highest performing models selected via c-index and feature stability selection defined in phase 3 (refer to Final Evaluation Phase in Methods section) were retrained for hyperparameter tuning under nested CV and re-evaluated according to c-index. Key statistics, such as training sample size, the ablation components, and feature set pre- and postreduction, are measured. Feature reduction was model-specific. RSF pruned features with average relative permutation importance. Nonzero feature coefficients determined by elastic net regularization remained in the CPHR model. <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref> show feature reductions, best performing data source ablations, training sample sizes, and average c-index with 95% CIs of each stage-specific model. Raw JSON metric files can be found in <xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>.</p><p>RSF shows higher c-index readings across all stages. However, CoxNet had smaller CIs across all stages, suggesting it may be more generalizable, which requires further analysis using an external dataset. Given that the bootstrapping across each nested fold was set to 150, which was used as a compromise between precision and computational runtimes, it may be the case that a higher number of sample iterations could yield more precise and potentially smaller intervals in both models. All recorded evaluation metrics for final models trained with nested CV are available in <xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>The highest-performing ablation components, training set sample sizes, and respective c-indices for each model after average performance from the ablation analysis phase. Subscripts below longitudinal ablation components indicate the cohort data subset is additionally constrained to only recorded prior to the respective stage.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Trial stage</td><td align="left" valign="bottom">Ablation components</td><td align="left" valign="bottom">Training sample size</td><td align="left" valign="bottom">Model average c-index (95% CI)</td></tr></thead><tbody><tr><td align="left" valign="top">Postinduction</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>: Clinical<sub>i</sub>, MRD<sub>i</sub><sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup>, NGS<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup>, <italic>FLT3<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></italic>, and <italic>NPM1<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></italic></p></list-item><list-item><p>CoxNet<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup>: Clinical<sub>i</sub>, <italic>FLT3</italic>, and <italic>NPM1</italic></p></list-item></list></td><td align="left" valign="top">2989</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: 0.68 (0.62&#x2010;0.74)</p></list-item><list-item><p>CoxNet: 0.67 (0.62&#x2010;0.72)</p></list-item></list></td></tr><tr><td align="left" valign="top">Post-C1</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: Clinical<sub>C1</sub>, MRD<sub>C1</sub>, <italic>FLT3</italic>, and <italic>NPM1</italic></p></list-item><list-item><p>CoxNet: Clinical<sub>C1</sub>, <italic>FLT3</italic>, and <italic>NPM1</italic></p></list-item></list></td><td align="left" valign="top">2609</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: 0.69 (0.63&#x2010;0.76)</p></list-item><list-item><p>CoxNet: 0.68 (0.62&#x2010;0.74)</p></list-item></list></td></tr><tr><td align="left" valign="top">Post-C2</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: Clinical<sub>C2</sub>, MRD<sub>C2</sub>, <italic>FLT3</italic>, and <italic>NPM1</italic></p></list-item><list-item><p>CoxNet: Clinical<sub>C2</sub>, MRD<sub>C2</sub>, <italic>FLT3</italic>, and <italic>NPM1</italic></p></list-item></list></td><td align="left" valign="top">1917</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: 0.68 (0.61&#x2010;0.74)</p></list-item><list-item><p>CoxNet: 0.66 (0.58&#x2010;0.74)</p></list-item></list></td></tr><tr><td align="left" valign="top">Post-C3</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: Clinical<sub>C3</sub>, MRD<sub>C3</sub>, <italic>FLT3</italic>, and <italic>NPM1</italic></p></list-item><list-item><p>CoxNet: Clinical<sub>C3</sub>, <italic>FLT3</italic>, and <italic>NPM1</italic></p></list-item></list></td><td align="left" valign="top">761</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>RSF: 0.69 (0.56&#x2010;0.81)</p></list-item><list-item><p>CoxNet: 0.63 (0.49&#x2010;0.77)</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>RSF: random survival forest.</p></fn><fn id="table2fn2"><p><sup>b</sup>MRD:  minimal residual disease.</p></fn><fn id="table2fn3"><p><sup>c</sup>NGS: next-generation sequencing.</p></fn><fn id="table2fn4"><p><sup>d</sup>FLT3: Feline McDonough sarcoma-Like Tyrosine kinase 3.</p></fn><fn id="table2fn5"><p><sup>e</sup>NPM1: Nucleophosmin 1.</p></fn><fn id="table2fn6"><p><sup>f</sup>CoxNet: Cox proportional hazard regression with elastic net regularization.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-7"><title>Evaluation of the AML17 Protocol Model</title><p>Using the full posttrial stage C1 cohort, the AML17 trial protocol risk assessment Cox linear regression model was compared with the RSF and CoxNet models at the same stage using a 0.9:0.1 train-test split and 1000 bootstrapped samples by c-index.</p><p>These results show the potential performance gain between trial risk-based models, such as the one used in AML17 for patient treatment stratification, and the RSF and CoxNet models used within this study. The reader is reminded that this specific protocol comparative analysis was exploratory and excluded a nested CV process, unlike those used to produce results highlighted in <xref ref-type="table" rid="table2">Table 2</xref>. The larger training sample set used for these specific models, while suggesting nonpessimistic performance with more realistic training set sizes, is at risk of overfitting against an external dataset and should be interpreted with caution. It should also be noted that CIs between all models overlap. Future work will include obtaining additional trial data from subsequent AML18 and AML19 trials as an external validation source.</p></sec><sec id="s3-8"><title>Dynamic AUC</title><p>Dynamic AUC was computed for each stage-specific model (RSF and CoxNet) from the beginning of the stage to the 5-year censoring point. Dynamic AUC quantifies a model&#x2019;s discriminative ability at a given point in time <italic>t</italic>, representing how well the model distinguishes between patients who experience an event before <italic>t</italic> and those who remain event-free beyond <italic>t</italic>. AUC scores are bounded between values 0 and 1 inclusively, with higher values indicating better discrimination. A value of 0.5 corresponds to random chance, where a model cannot distinguish between patients. Values below 0.5 indicate a model makes predictions in the opposite ordering, effectively inverting the risk estimate. Note that an AUC of 0 indicates that the model perfectly ranks patients in the reverse order of risk; in such cases, inverting predicted risk scores would yield an AUC of 1, corresponding to perfect discrimination.</p><p>Following the ablation study, candidate models were evaluated by their mean c-index across all repeated, stratified folds of the CV process. At each trial stage, the highest performing RSF and CoxNet models were selected for a final, more robust nested CV with additional bootstrapping per fold for performance estimates. As observed, event time points differ across bootstrapping samples, a universal set of equally intervaled time points was first predefined. Each fold bootstrap sample AUC measurement was then interpolated to its nearest universal time point.</p><p>AUC shows an initial period of instability immediately after the stage&#x2019;s baseline. This behavior was expected&#x2014;early time points will have a smaller cumulative observed event count than the remaining time window of the trial stage. Additionally, risk distributions shift rapidly in these early time windows as patients transition into new treatments based on the existing protocol risk assessment and randomized allocation. As time progresses, all models tend to a more stabilized AUC score, shown by the plateau of the mean AUC and narrowing of CIs.</p><p>Later stages, (particularly poststage 3) exhibit visibly wider CIs and flatter trajectories. This reflects the considerably lower sample sizes of this stage (&#x201C;Post-C3&#x201D; n=761, <xref ref-type="table" rid="table2">Table 2</xref>) and higher censoring proportions (<xref ref-type="fig" rid="figure2">Figure 2</xref>) at this deeper trial stage. Consequently, this limits statistical power and increases uncertainty in time-dependent discrimination estimates. Therefore, dynamic AUC curves at poststage 3 (<xref ref-type="fig" rid="figure5">Figures 5G and 5H</xref>) should be interpreted with greater caution.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Dynamic AUC performance of each stage-specific model plotting average dynamic AUC performance as a dark blue line and 95% CI as the light blue shaded region. AUC: area under the receiver operating characteristic curve.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="bioinform_v7i1e75678_fig05.png"/></fig></sec><sec id="s3-9"><title>Dynamic Brier Loss</title><p>Dynamic Brier loss was computed for each stage-specific RSF and CoxNet model selected via the ablation study to evaluate prediction accuracy over time. At each time point <italic>t</italic>, the Brier score measures the mean square difference between the predicted survival probability at <italic>t</italic> and the observed event status. Score values are bounded between 0 and 1 inclusively, where 0 represents a perfectly accurate model and 1 a completely inaccurate model.</p><p>To enable consistent comparison across CV fold bootstrapped samples, dynamic Brier loss curves were evaluated over the same set of predefined, equally intervaled time points used for the dynamic AUC assessment.</p><p>Across all stages, dynamic Brier loss scores begin with very low loss values and narrow CIs. This behavior is expected&#x2014;a smaller proportion of events has been observed at these baseline windows, most patients remain event-free, resulting in highly accurate short-term predictions at low variance. As time progresses throughout each stage model, the Brier loss score increases and the CIs widen. This reflects both the increasing difficulty of long-term survival prediction and the gradual decrease of the at-risk population contributing to the estimate (as the number of patients experiencing an event before <italic>t</italic> increases, decreasing the remaining subset of at-risk patients available from <italic>t</italic>).</p><p>Similarly, to dynamic AUC results, later stages (particularly poststage 3) show visibly wider CIs. This pattern arises from the reduced sample size available within the cohort (761 patients; <xref ref-type="table" rid="table2">Table 2</xref>) and higher censoring proportions (<xref ref-type="fig" rid="figure2">Figure 2</xref>), which limit the precision of time-dependent accuracy estimates and increase variance at later <italic>t</italic> evaluation points. Therefore, poststage 3 (<xref ref-type="fig" rid="figure6">Figure 6G and 6H</xref>) should be interpreted with caution.</p><p>Full resolution dynamic Brier loss and AUC plots are available in <xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Dynamic Brier loss of each stage-specific model plotting average Brier&#x2019;s loss as a dark blue line and 95% CI as the light blue shaded region.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="bioinform_v7i1e75678_fig06.png"/></fig></sec><sec id="s3-10"><title>Feature Importances</title><p>The following figures show the feature importance of the best-performing tuned models at each select stage of the trial. Importance was ranked according to relative importance scores calculated from permutation importance for RSF and by coefficient values from CoxNet. For postinduction, post-C1, post-C2, and post-C3 models, RSF&#x2019;s feature selection method included more features in the final model. Given the small difference in c-index and dynamic AUC performance between RSF and CoxNet during trial stage predictions, it is unlikely that the increased feature pool holds strong predictive candidates, suggesting that the increased RSF feature dimensionality from the feature selection process could pose a risk to overfitting and lack of generalizability.</p><p>Feature importance for each trial stage model has been ranked to highlight the strongest predictors. For figure readability below, only the top 30 highest-ranked features are included; the total number of features for all stage models exceeds this number, as illustrated in <xref ref-type="table" rid="table1">Table 1</xref>. While the principal goal of this study concerns the performance of time-sequential TTE prediction models through the trial, future works investigating and explaining generalizations of these models are necessary. Therefore, as a precursory step for future work, we have highlighted the most important features of the best models for each stage in <xref ref-type="fig" rid="figure7">Figure 7</xref>. Features that consistently mapped over all or the majority of (often excluding the early postinduction stage as longitudinal records did not exist at this time point) trial stage models include known AML prognosticators&#x2014;age, white blood cell count, marrow blast values, cytogenetic risk groups (such as the core-binding factor t(8;21)/inv(16)), <italic>NPM1</italic>, and <italic>FLT3</italic> markers [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref65">65</xref>]. There are also several consistently important predictors unreferenced as biological risk factors. Missing indicators (eg, blast marrow, <italic>FLT3</italic> mutation type, and internal tandem duplication length value entries) suggest predictive informativeness of missing measurements, which may reflect selection bias, measurement practices from clinical sites, or the severity of a patient&#x2019;s condition. Other predictors include administrative timing fields, such as marrow blast test dates or chemotherapy timings, which, while not strictly biological risk factors, suggest the importance of treatment timing or intensity. It is possible that more precise dosage levels and treatment timings could be recommended from such a system on a per-patient basis. Predictors measured as important but not referenced within the literature most likely reflect measurement practice bias or even data leakage, not necessarily causal disease biology, and warrant further analysis.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Top 30 ranked relative feature importance of trial stage specific models.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="bioinform_v7i1e75678_fig07.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The primary performance metric, c-index, of the best models for each trial stage shows that RSF outperforms CoxNet throughout all major stages of the trial, with differences in performance becoming more apparent throughout successive trial stages. This suggests the optimal simulation of TTE outcome prediction would incorporate RSF or additional nonlinear model types that can capture nonlinear, complex relationships.</p><p>Brier loss of stage models appears similar in mean square error trends for all stage models except at stage 3, likely due to having the smallest sample size for training. In the instance of stage 3, the difference in accuracy is larger between RSF and its lower accuracy counterpart, CoxNet. However, it must be noted that the sample size for training of this model is the lowest, as final trial arm branches become increasingly fractional, using 761 samples. Therefore, overfitting is increasingly more likely at this stage, and conclusions made for the outperforming model should be made with caution.</p><p>Cumulative AUC shows that both models tend to perform similarly throughout time, being most unstable at the initial stages of the model&#x2019;s available prediction window before censoring (which spans from the end of the respective stage to the 5-year censoring point since patient induction). Notable inflection points occur at similar early time frame windows, which eventually plateau into a stable time-dependent prediction. The degree of change during the initial window of each stage model before stabilization appears more drastic for both models, showing instances of under- and overperformance relative to their mean AUC score. This indicates that the initial periods of all trial stage models are the most sensitive zones in terms of predictive performance. While it cannot necessarily be concluded that the large performance differences before plateaus are solely a result of inadequate sample sizes, there are naturally fewer total event instances in the earliest sample periods with respect to the aggregate of events throughout the entire 5-year window. With low event counts at the earliest AUC time points, variability may be higher, resulting in an overestimate of performance as seen across dynamic AUC plots of all stage models. This would explain the noticeable fluctuations before stabilization of the mean AUC value for each stage model. This initial window would also be the most dynamic point in time with respect to patient treatment selection, where the trial protocol outline determined the treatment stratification of patients into one of multiple arms and so was highly influential on overall survival. This initial unstable period approximately coincides with the worst-case trial length for patients from induction to the end of stage 4, roughly 230 days [<xref ref-type="bibr" rid="ref16">16</xref>]. Aside from effective sample sizes, it seems intuitive that the most sensitive prediction period of models trained on longitudinal data would be the initial time window after their measurements, as they more accurately reflect the real state of the patient, in the sense that there has been less time for longitudinal measurements to differ from their latest recorded value.</p><p>The ablation study stresses the importance of data held in the clinical dataset as well as <italic>FLT3</italic> and <italic>NPM1</italic> and longitudinal MRD records (metadata available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref><xref ref-type="supplementary-material" rid="app2"/>-<xref ref-type="supplementary-material" rid="app3">3</xref>), which were used for all the highest-ranked RSF models for each stage. The differences in ablated data sources will provide contextual pointers for future analysis of model features with traditional techniques (such as survival curves) or ML techniques (such as clustering), prioritizing features ranked most important by their respective model (<xref ref-type="fig" rid="figure1">Figure 1</xref>). Surprisingly, the NGS subset was only included in the postinduction stage RSF model, indicating that the strongest mutational markers came from the FLT3 and NPM1 dataset and karyotype information held in the clinical dataset, both of which remained consistently important across all stage CoxNet and RSF models, as seen in <xref ref-type="table" rid="table2">Table 2</xref>. NGS mutational biomarkers remained sparse per cohort and most likely effectively acted as noise to both models. This suggests further sensitivity analysis with a wider range of available features via composite NGS feature engineering. The preprocessing of NGS data in these experiments only used available gene mutation indicators to avoid catastrophic explosions in feature set dimensionality by the creation of composite variables. Some excluded features include tumor variant allele frequency and gene mutation base start and end locations, which are candidates for future analysis. A list of all available NGS features can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref><xref ref-type="supplementary-material" rid="app2"/>-<xref ref-type="supplementary-material" rid="app3">3</xref>.</p><p>Performance analysis of models constrained to data available up to trial stages approximately equates to performances in literature for a Cox proportional hazard model used for risk group stratification based on aggregate non&#x2013;time point constrained data [<xref ref-type="bibr" rid="ref26">26</xref>]. In the wider context of digital twins [<xref ref-type="bibr" rid="ref43">43</xref>], the results in performance in this study, particularly at the earlier stage time points, suggest that it is possible to provide accurate simulations on survival risk based on models trained at iterative stages of treatment. A digital twin would simulate multiple AML patient outcomes that are relevant to patient well-being; this invites further study on other time-based outcome predictions using the generalized method of this study, such as for QoL or comorbidity prediction.</p><p>Approaching a digital twin system with a core prediction layer using ML models should have access to longitudinal data with a temporal resolution, optimal for AML-based predictions. Given longitudinal records, such as MRD, were used by the highest performing stage models (excluding the initial postinduction stage), future work should also involve sensitivity analyses on model prediction accuracy using more frequent, intrastage measurement. This is a practically challenging area of research as trial-based data, such as AML17, even with longitudinal records, is often restricted to stage-wise updates which last several weeks or more depending on chemotherapy regimen. Such a system also demands an automated, digitalized data collection scheme, which is not the norm in trial protocols, where many records are paper-based. Currently existing ML models trained on stage-wise model, such as those shown in these experiments, indicate the promising practicality of their usage in clinical environments when used as the core stratification method within a digital twin system. However, the surrounding architecture necessary to feed models with accurate patient data with high temporal resolution is lacking. The total man-hours to preprocess trial-specific records and validate models with comparable features across trial or real-world data sources for such a system would greatly compromise its applicability, unless a standardized data capture process which records data in an ML model&#x2013;friendly manner (eg, with improved error handling &#x2013; particularly for critical date-time entries, mandatory classification levels instead of clinician written note fields, stricter numerical field unit standardizations, a higher volume of QoL records, and higher resolution of longitudinal record entries) is developed for patients with AML. While RSF can model complex nonlinear relationships, they do not explicitly account for sequential dependencies; when higher resolution longitudinal data are available, researchers should evaluate the performance and feasibility of ML models designed to exploit temporal structures, such as recurrent or long short-term memory neural networks.</p><p>In the context of an AML digital twin, the presented RSF and CoxNet models here would act as the core prediction layer, updating patient-specific risk estimates whenever new measurements become available. As AML data are collected at discrete protocol-defined intervals (per trial treatment stages), this framework does not make use of real-time <italic>streamed</italic> data (which typically do not exist in trial-based datasets) but instead &#x201C;real-time upon update&#x201D; recalculation of risk as new clinical or MRD entries are recorded for a patient. Generalizability across clinical sites would be supported by a standardized preprocessing pipeline like what has been developed in this study. The monitoring of feature distribution drift (eg, via Population Stability Index) would allow for precise trigger points for model retraining when necessary. The planned external validation of RSF and CoxNet models as core digital twin predictive layers on AML18 and 19 datasets will further quantify cross-site robustness.</p></sec><sec id="s4-2"><title>Run Times</title><p>Model training and validation were computed on the Cardiff University &#x201C;Hawk&#x201D; high-performance computing cluster. Jobs were submitted via Simple Linux Utility for Resource Management, using 1 node, 1 task, and 14 CPU cores with the high-throughput partition. Processing time measurements show that RSF is much more intensive to validate than CoxNet, primarily due to the time complexity of the exhaustive feature importance method used&#x2014;permutation importance. For example, when comparing runtimes of the largest cohort and ablation set&#x2014;postinduction stage, with ablation components (clinical, MRD, NGS, <italic>FLT3</italic>, and <italic>NPM1</italic>)&#x2014;averaged across the 3 repeated, k=5 split CV loop, RSF took on average 147 seconds per fold, while CoxNet on average took 1 second. This difference is made more apparent after the more robust internal nested CV process, where the same RSF and CoxNet models and ablations took 7.9 hours and 1.4 hours, respectively, for the <italic>entire</italic> validation process across all folds. A caveat arises, particularly with the usage of the RSF model in clinical applications with large sample sizes; while predictions from trained models are near instantaneous, the full training and validation times of such models may be costly for critical patients relying on fast treatment delivery. In practice, it may be necessary to use CoxNet (or a similar &#x201C;fast&#x201D; baseline model) for predictions and counterfactual simulations as a preliminary tool while more sensitive, albeit slower models, such as RSF, are trained. With respect to identifying when models should be retrained, a quantifiable approach to determine the threshold should be calculated. For example, the Population Stability Index can be used to measure if there is a significant drift in a feature between the reference (trained) sample set and the new dataset. This would minimize redundancy and processing constraints in retraining by otherwise using an arbitrary threshold for determining when a model should be retrained, in turn minimizing potential performance or generalizability loss.</p></sec><sec id="s4-3"><title>Comparisons With Previous Work</title><p>Using a Cox proportional hazards model with ridge regression, Tazi et al [<xref ref-type="bibr" rid="ref26">26</xref>] recorded an IPCW c-index of approximately 0.7 with a combined total of 26 clinical, demographic, FLT3 (ITD), and other molecular class features aggregated from UK-NCRI, such as AML11, 12, 14, 15, 16, and 17 trials. Models constrained to data at each trial stage in this study have comparable IPCW c-index scores of around 0.69 using RSF. The number of features retained in this study after model reduction processes is variable per stage, highlighting the significance of specific features at select time points for TTE predictions. Many features consistently shared across stages are noted in existing ELN risk classification, and those not referenced indicate the potential importance of timing and informativeness from both administrative and testing fields. The total number of retained features is higher for each stage model than in the study by Tazi et al [<xref ref-type="bibr" rid="ref26">26</xref>], which used 26 features, as opposed to over 160 features across all RSF stage models. Future analysis will involve all features used at each stage in comparison with existing literature and guidelines for risk stratification, in addition to external validation to assess model generalizability.</p><p>RSF and CoxNet models outperform the AML17 protocol&#x2019;s Cox model for risk stratification based on c-index (&#x201C;Evaluation of AML17 Protocol Model&#x201D; in <xref ref-type="table" rid="table3">Table 3</xref>, showing c-index readings of AML17 Protocol Cox Linear Regression=0.66, CoxNet=0.68, and RSF=0.70), suggesting both implementations can more accurately predict TTE. However, CIs overlap, suggesting larger datasets may be necessary to conclude an absolute improvement in generalized performance.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>C-indices for each model trained on a larger cohort sample size of data available from post stage.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">C-index (95% CI)</td></tr></thead><tbody><tr><td align="left" valign="top">AML17<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> Protocol Cox Linear Regression</td><td align="left" valign="top">0.66 (0.63&#x2010;0.68)</td></tr><tr><td align="left" valign="top">CoxNet<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">0.68 (0.63&#x2010;0.73)</td></tr><tr><td align="left" valign="top">RSF<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">0.70 (0.64&#x2010;0.76)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>AML17: United Kingdom National Cancer Research Institute Acute Myeloid Leukaemia 17 randomized controlled trial.</p></fn><fn id="table3fn2"><p><sup>b</sup>CoxNet: Cox proportional hazard regression with elastic net regularization.</p></fn><fn id="table3fn3"><p><sup>c</sup>RSF: random survival forest.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s4-4"><title>Limitations</title><p>Since the completion of the AML17 trial, substantial progress has been made in patient therapy options through the identification of prognostic, predictive, and targetable molecular abnormalities [<xref ref-type="bibr" rid="ref19">19</xref>]. While the analysis of models used in this study provides insight into survival prediction performance using longitudinally restricted data, future work would benefit from the inclusion of more recent trial datasets using newly approved first-line treatment options, such as Midostaurin and CPX-351. Additionally, AML17 patient-reported outcomes, including QoL, were excluded from model training due to concerns on overall sample size within trial stage time points. The future inclusion of datasets with larger pools of patient-reported outcomes data is of particular interest for the prediction of additional outcome responses that may reflect on the social and psychological health of patients at different stages of disease treatment and progression. Further research into additional outcome predictions can be integrated as part of a generalized AML digital twin that can inform patients and provide accurate recommendations to health care practitioners, particularly where survival risk between treatments is marginal, but there are clear differences in secondary predictions, such as patients&#x2019; QoL.</p></sec><sec id="s4-5"><title>Conclusion</title><p>This study shows the practicality of time-to-survival-event predictions when training sets of CoxNet and RSF models, which are sequentially constricted to data measured up to the end of respective AML17 trial stages. The performance of these sequential TTE models is intended to justify their use as part of a wider digital twin system simulating multiple TTE outcomes for patients with AML. The primary c-index metric shows comparable scores to the literature that uses similar models on aggregate sets of similar trial data. Consistent and stable important features relative to each stage-specific model are supported by ELN literature on AML classification, and additional nonreferenced predictors suggest the importance of stage-specific administrative and timing fields. Additional cumulative-dynamic AUC and Brier loss metrics have been provided. The most immediate future work includes feature analysis of the best models at each stage, further comparison with existing risk group stratification guidelines such as ELN, external validation with follow-up AML18 and 19 trial programs, the implementation of a minimally adapted pipeline for different outcome measurements, and the inclusion of patient self-assessed QoL form records alongside a broader collection of longitudinal MRD data, which have been recorded after trial treatment stages as detailed in the AML17 protocol and follow-up AML18 and 19 trials.</p></sec></sec></body><back><ack><p>We thank the clinicians, research nurses, and laboratory scientists who enrolled patients and provided samples for the AML17 trial. We acknowledge and thank all the patients and families for their participation in, and support of, the trial.</p><p/><p>This research was undertaken using the supercomputing facilities at Cardiff University operated by Advanced Research Computing at Cardiff (ARCCA) on behalf of the Cardiff Supercomputing Facility and the Supercomputing Wales (SCW) project. We acknowledge the support of the latter, which is part-funded by the European Regional Development Fund (ERDF) via the Welsh Government.</p><p/></ack><notes><sec><title>Funding</title><p>The AML17 trial received research support from Cancer Research UK (CRUK/08/025, A29806).This work was supported by the Engineering and Physical Sciences Research Council Doctoral Training Partnership under grant EP/W524682.</p></sec><sec><title>Data Availability</title><p>The datasets analyzed during the current study are not publicly available due to the sensitive nature of individual participant clinical data, but are available from the trial sponsor on reasonable request. Access requires submission of a research proposal, a Statistical Analysis Plan, and execution of a Data Sharing Agreement. Requests can be made by contacting the UK-NCRI AML17 trial sponsor via ctr@cardiff.ac.uk or through the Centre for Trials Research data request webpage [<xref ref-type="bibr" rid="ref66">66</xref>].</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AML</term><def><p>acute myeloid leukemia</p></def></def-item><def-item><term id="abb2">AML17</term><def><p>United Kingdom National Cancer Research Institute Acute Myeloid Leukaemia 17 randomized controlled trial</p></def></def-item><def-item><term id="abb3">AUC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb4">c-index</term><def><p>concordance index</p></def></def-item><def-item><term id="abb5">CoxNet</term><def><p>Cox proportional hazard regression with elastic net regularization</p></def></def-item><def-item><term id="abb6">CPHR</term><def><p>Cox proportional hazard regression</p></def></def-item><def-item><term id="abb7">CV</term><def><p>cross-validation</p></def></def-item><def-item><term id="abb8">ELN</term><def><p>European LeukemiaNet</p></def></def-item><def-item><term id="abb9">FLT3</term><def><p>Feline McDonough sarcoma-Like Tyrosine kinase 3</p></def></def-item><def-item><term id="abb10">IPCW</term><def><p>inverse probability of censoring weights</p></def></def-item><def-item><term id="abb11">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb12">MRD</term><def><p>minimal residual disease</p></def></def-item><def-item><term id="abb13">NGS</term><def><p>next-generation sequencing</p></def></def-item><def-item><term id="abb14">NPM1</term><def><p>Nucleophosmin 1</p></def></def-item><def-item><term id="abb15">QoL</term><def><p>quality of life</p></def></def-item><def-item><term id="abb16">RSF</term><def><p>random survival forest</p></def></def-item><def-item><term id="abb17">TTE</term><def><p>time-to-event</p></def></def-item><def-item><term id="abb18">UK-NCRI</term><def><p>United Kingdom National Cancer Research Institute</p></def></def-item><def-item><term id="abb19">WHO</term><def><p>World Health Organization</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>Cancers by body location/system</article-title><source>National Cancer Institute</source><access-date>2025-03-25</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancer.gov/types/by-body-location">https://www.cancer.gov/types/by-body-location</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saultz</surname><given-names>JN</given-names> </name><name name-style="western"><surname>Garzon</surname><given-names>R</given-names> </name></person-group><article-title>Acute myeloid leukemia: a concise review</article-title><source>J Clin Med</source><year>2016</year><month>03</month><day>5</day><volume>5</volume><issue>3</issue><fpage>33</fpage><pub-id pub-id-type="doi">10.3390/jcm5030033</pub-id><pub-id pub-id-type="medline">26959069</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yoon</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Hong</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Characterization and prognosis of secondary acute myeloid leukemia in an Asian population: AML with antecedent hematological disease confers worst outcomes, irrespective of cytogenetic risk</article-title><source>Anticancer Res</source><year>2020</year><month>05</month><volume>40</volume><issue>5</issue><fpage>2917</fpage><lpage>2924</lpage><pub-id pub-id-type="doi">10.21873/anticanres.14269</pub-id><pub-id pub-id-type="medline">32366443</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Baruchel</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bourquin</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Crispino</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Down syndrome and leukemia: from basic mechanisms to clinical advances</article-title><source>haematol</source><year>2023</year><month>07</month><volume>108</volume><issue>10</issue><fpage>2570</fpage><lpage>2581</lpage><pub-id pub-id-type="doi">10.3324/haematol.2023.283225</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alter</surname><given-names>BP</given-names> </name></person-group><article-title>Fanconi anemia and the development of leukemia</article-title><source>Best Pract Res Clin Haematol</source><year>2014</year><volume>27</volume><issue>3-4</issue><fpage>214</fpage><lpage>221</lpage><pub-id pub-id-type="doi">10.1016/j.beha.2014.10.002</pub-id><pub-id pub-id-type="medline">25455269</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sportoletti</surname><given-names>P</given-names> </name><name name-style="western"><surname>Grisendi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Majid</surname><given-names>SM</given-names> </name><etal/></person-group><article-title>Npm1 is a haploinsufficient suppressor of myeloid and lymphoid malignancies in the mouse</article-title><source>Blood</source><year>2008</year><month>04</month><day>1</day><volume>111</volume><issue>7</issue><fpage>3859</fpage><lpage>3862</lpage><pub-id pub-id-type="doi">10.1182/blood-2007-06-098251</pub-id><pub-id pub-id-type="medline">18212245</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Donner</surname><given-names>L</given-names> </name><name name-style="western"><surname>Fedele</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Garon</surname><given-names>CF</given-names> </name><name name-style="western"><surname>Anderson</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Sherr</surname><given-names>CJ</given-names> </name></person-group><article-title>McDonough feline sarcoma virus: characterization of the molecularly cloned provirus and its feline oncogene (v-fms)</article-title><source>J Virol</source><year>1982</year><month>02</month><volume>41</volume><issue>2</issue><fpage>489</fpage><lpage>500</lpage><pub-id pub-id-type="doi">10.1128/JVI.41.2.489-500.1982</pub-id><pub-id pub-id-type="medline">6281462</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kiyoi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kawashima</surname><given-names>N</given-names> </name><name name-style="western"><surname>Ishikawa</surname><given-names>Y</given-names> </name></person-group><article-title>FLT3 mutations in acute myeloid leukemia: therapeutic paradigm beyond inhibitor development</article-title><source>Cancer Sci</source><year>2020</year><month>02</month><volume>111</volume><issue>2</issue><fpage>312</fpage><lpage>322</lpage><pub-id pub-id-type="doi">10.1111/cas.14274</pub-id><pub-id pub-id-type="medline">31821677</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shallis</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Weiss</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Deziel</surname><given-names>NC</given-names> </name><name name-style="western"><surname>Gore</surname><given-names>SD</given-names> </name></person-group><article-title>A clandestine culprit with critical consequences: benzene and acute myeloid leukemia</article-title><source>Blood Rev</source><year>2021</year><month>05</month><volume>47</volume><fpage>100736</fpage><pub-id pub-id-type="doi">10.1016/j.blre.2020.100736</pub-id><pub-id pub-id-type="medline">32771228</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fircanis</surname><given-names>S</given-names> </name><name name-style="western"><surname>Merriam</surname><given-names>P</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Castillo</surname><given-names>JJ</given-names> </name></person-group><article-title>The relation between cigarette smoking and risk of acute myeloid leukemia: an updated meta-analysis of epidemiological studies</article-title><source>Am J Hematol</source><year>2014</year><month>08</month><volume>89</volume><issue>8</issue><fpage>E125</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1002/ajh.23744</pub-id><pub-id pub-id-type="medline">24753145</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Strickland</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Vey</surname><given-names>N</given-names> </name></person-group><article-title>Diagnosis and treatment of therapy-related acute myeloid leukemia</article-title><source>Crit Rev Oncol Hematol</source><year>2022</year><month>03</month><volume>171</volume><fpage>103607</fpage><pub-id pub-id-type="doi">10.1016/j.critrevonc.2022.103607</pub-id><pub-id pub-id-type="medline">35101585</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Desai</surname><given-names>RH</given-names> </name><name name-style="western"><surname>Zandvakili</surname><given-names>N</given-names> </name><name name-style="western"><surname>Bohlander</surname><given-names>SK</given-names> </name></person-group><article-title>Dissecting the genetic and non-genetic heterogeneity of acute myeloid leukemia using next-generation sequencing and in vivo models</article-title><source>Cancers (Basel)</source><year>2022</year><month>04</month><day>27</day><volume>14</volume><issue>9</issue><fpage>2182</fpage><pub-id pub-id-type="doi">10.3390/cancers14092182</pub-id><pub-id pub-id-type="medline">35565315</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>D&#x00F6;hner</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wei</surname><given-names>AH</given-names> </name><name name-style="western"><surname>Appelbaum</surname><given-names>FR</given-names> </name><etal/></person-group><article-title>Diagnosis and management of AML in adults: 2022 recommendations from an international expert panel on behalf of the ELN</article-title><source>Blood</source><year>2022</year><month>09</month><day>22</day><volume>140</volume><issue>12</issue><fpage>1345</fpage><lpage>1377</lpage><pub-id pub-id-type="doi">10.1182/blood.2022016867</pub-id><pub-id pub-id-type="medline">35797463</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arber</surname><given-names>DA</given-names> </name><name name-style="western"><surname>Orazi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hasserjian</surname><given-names>R</given-names> </name><etal/></person-group><article-title>The 2016 revision to the World Health Organization classification of myeloid neoplasms and acute leukemia</article-title><source>Blood</source><year>2016</year><month>05</month><day>19</day><volume>127</volume><issue>20</issue><fpage>2391</fpage><lpage>2405</lpage><pub-id pub-id-type="doi">10.1182/blood-2016-03-643544</pub-id><pub-id pub-id-type="medline">27069254</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qin</surname><given-names>D</given-names> </name></person-group><article-title>Next-generation sequencing and its clinical application</article-title><source>Cancer Biol Med</source><year>2019</year><month>02</month><volume>16</volume><issue>1</issue><fpage>4</fpage><lpage>10</lpage><pub-id pub-id-type="doi">10.20892/j.issn.2095-3941.2018.0055</pub-id><pub-id pub-id-type="medline">31119042</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="web"><article-title>AML 17 protocol for patients aged under 60</article-title><source>Cardiff University Centre for Trials Research</source><access-date>2024-10-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://trials.cardiff.ac.uk/aml/17/web/files/new3/AML%2017%20Protocol%20June11%20v7.1%20.pdf">https://trials.cardiff.ac.uk/aml/17/web/files/new3/AML%2017%20Protocol%20June11%20v7.1%20.pdf</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><article-title>AML17</article-title><source>Cardiff University | Centre for Trials Research</source><access-date>2024-10-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cardiff.ac.uk/centre-for-trials-research/research/studies-and-trials/view/aml17">https://www.cardiff.ac.uk/centre-for-trials-research/research/studies-and-trials/view/aml17</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Burnett</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Russell</surname><given-names>NH</given-names> </name><name name-style="western"><surname>Hills</surname><given-names>RK</given-names> </name><etal/></person-group><article-title>A randomized comparison of daunorubicin 90 mg/m2 vs 60 mg/m2 in AML induction: results from the UK NCRI AML17 trial in 1206 patients</article-title><source>Blood</source><year>2015</year><month>06</month><day>18</day><volume>125</volume><issue>25</issue><fpage>3878</fpage><lpage>3885</lpage><pub-id pub-id-type="doi">10.1182/blood-2015-01-623447</pub-id><pub-id pub-id-type="medline">25833957</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kantarjian</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kadia</surname><given-names>T</given-names> </name><name name-style="western"><surname>DiNardo</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Acute myeloid leukemia: current progress and future directions</article-title><source>Blood Cancer J</source><year>2021</year><month>02</month><day>22</day><volume>11</volume><issue>2</issue><fpage>41</fpage><pub-id pub-id-type="doi">10.1038/s41408-021-00425-3</pub-id><pub-id pub-id-type="medline">33619261</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="web"><article-title>Cancer stat facts: leukemia &#x2014; acute myeloid leukemia (AML)</article-title><source>National Cancer Institute &#x2014; Surveillance, Epidemiology and End Results Program</source><access-date>2024-10-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://seer.cancer.gov/statfacts/html/amyl.html">https://seer.cancer.gov/statfacts/html/amyl.html</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Boscaro</surname><given-names>E</given-names> </name><name name-style="western"><surname>Urbino</surname><given-names>I</given-names> </name><name name-style="western"><surname>Catania</surname><given-names>FM</given-names> </name><etal/></person-group><article-title>Modern risk stratification of acute myeloid leukemia in 2023: integrating established and emerging prognostic factors</article-title><source>Cancers (Basel)</source><year>2023</year><month>07</month><day>6</day><volume>15</volume><issue>13</issue><fpage>3512</fpage><pub-id pub-id-type="doi">10.3390/cancers15133512</pub-id><pub-id pub-id-type="medline">37444622</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><article-title>Acute myeloid leukaemia (AML) statistics</article-title><source>Cancer Research UK</source><access-date>2024-05-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancerresearchuk.org/health-professional/cancer-statistics/statistics-by-cancer-type/leukaemia-aml">https://www.cancerresearchuk.org/health-professional/cancer-statistics/statistics-by-cancer-type/leukaemia-aml</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yaqoob</surname><given-names>A</given-names> </name><name name-style="western"><surname>Musheer Aziz</surname><given-names>R</given-names> </name><name name-style="western"><surname>verma</surname><given-names>NK</given-names> </name></person-group><article-title>Applications and techniques of machine learning in cancer classification: a systematic review</article-title><source>Hum-Cent Intell Syst</source><year>2023</year><month>12</month><volume>3</volume><issue>4</issue><fpage>588</fpage><lpage>615</lpage><pub-id pub-id-type="doi">10.1007/s44230-023-00041-3</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chi</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Survival prediction optimization of acute myeloid leukemia based on T-cell function-related genes and plasma proteins</article-title><source>Blood</source><year>2022</year><month>11</month><day>15</day><volume>140</volume><issue>Supplement 1</issue><fpage>6300</fpage><lpage>6302</lpage><pub-id pub-id-type="doi">10.1182/blood-2022-163201</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gal</surname><given-names>O</given-names> </name><name name-style="western"><surname>Auslander</surname><given-names>N</given-names> </name><name name-style="western"><surname>Fan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Meerzaman</surname><given-names>D</given-names> </name></person-group><article-title>Predicting complete remission of acute myeloid leukemia: machine learning applied to gene expression</article-title><source>Cancer Inform</source><year>2019</year><volume>18</volume><fpage>1176935119835544</fpage><pub-id pub-id-type="doi">10.1177/1176935119835544</pub-id><pub-id pub-id-type="medline">30911218</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tazi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Arango-Ossa</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Unified classification and risk-stratification in acute myeloid leukemia</article-title><source>Nat Commun</source><year>2022</year><month>08</month><day>8</day><volume>13</volume><issue>1</issue><fpage>4622</fpage><pub-id pub-id-type="doi">10.1038/s41467-022-32103-8</pub-id><pub-id pub-id-type="medline">35941135</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><article-title>AML18</article-title><source>Cardiff University | Centre for Trials Research</source><access-date>2024-05-09</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cardiff.ac.uk/centre-for-trials-research/research/studies-and-trials/view/aml18">https://www.cardiff.ac.uk/centre-for-trials-research/research/studies-and-trials/view/aml18</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Versluis</surname><given-names>J</given-names> </name><name name-style="western"><surname>Metzner</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Risk stratification in older intensively treated patients with AML</article-title><source>JCO</source><year>2024</year><month>12</month><volume>42</volume><issue>34</issue><fpage>4084</fpage><lpage>4094</lpage><pub-id pub-id-type="doi">10.1200/JCO.23.02631</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vock</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Wolfson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bandyopadhyay</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Adapting machine learning techniques to censored time-to-event health record data: a general-purpose approach using inverse probability of censoring weighting</article-title><source>J Biomed Inform</source><year>2016</year><month>06</month><volume>61</volume><fpage>119</fpage><lpage>131</lpage><pub-id pub-id-type="doi">10.1016/j.jbi.2016.03.009</pub-id><pub-id pub-id-type="medline">26992568</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Suresh</surname><given-names>K</given-names> </name><name name-style="western"><surname>Severn</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ghosh</surname><given-names>D</given-names> </name></person-group><article-title>Survival prediction models: an introduction to discrete-time modeling</article-title><source>BMC Med Res Methodol</source><year>2022</year><month>07</month><day>26</day><volume>22</volume><issue>1</issue><fpage>207</fpage><pub-id pub-id-type="doi">10.1186/s12874-022-01679-6</pub-id><pub-id pub-id-type="medline">35883032</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Breiman</surname><given-names>L</given-names> </name></person-group><article-title>Random forests</article-title><source>Mach Learn</source><year>2001</year><month>10</month><volume>45</volume><issue>1</issue><fpage>5</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ishwaran</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kogalur</surname><given-names>UB</given-names> </name><name name-style="western"><surname>Blackstone</surname><given-names>EH</given-names> </name><name name-style="western"><surname>Lauer</surname><given-names>MS</given-names> </name></person-group><article-title>Random survival forests</article-title><source>Ann Appl Stat</source><year>2008</year><month>09</month><volume>2</volume><issue>3</issue><fpage>3</fpage><pub-id pub-id-type="doi">10.1214/08-AOAS169</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>Y</given-names> </name></person-group><article-title>Statistical methods for analyzing right-censored length-biased data under Cox model</article-title><source>Biometrics</source><year>2010</year><month>06</month><volume>66</volume><issue>2</issue><fpage>382</fpage><lpage>392</lpage><pub-id pub-id-type="doi">10.1111/j.1541-0420.2009.01287.x</pub-id><pub-id pub-id-type="medline">19522872</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gui</surname><given-names>J</given-names> </name><name name-style="western"><surname>Li</surname><given-names>H</given-names> </name></person-group><article-title>Penalized Cox regression analysis in the high-dimensional and low-sample size settings, with applications to microarray gene expression data</article-title><source>Bioinformatics</source><year>2005</year><month>07</month><day>1</day><volume>21</volume><issue>13</issue><fpage>3001</fpage><lpage>3008</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/bti422</pub-id><pub-id pub-id-type="medline">15814556</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hastie</surname><given-names>T</given-names> </name></person-group><article-title>Regularization and variable selection via the elastic net</article-title><source>Journal of the Royal Statistical Society Series B</source><year>2005</year><month>04</month><day>1</day><volume>67</volume><issue>2</issue><fpage>301</fpage><lpage>320</lpage><pub-id pub-id-type="doi">10.1111/j.1467-9868.2005.00503.x</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leger</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zwanenburg</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pilz</surname><given-names>K</given-names> </name><etal/></person-group><article-title>A comparative study of machine learning methods for time-to-event survival data for radiomics risk modelling</article-title><source>Sci Rep</source><year>2017</year><month>10</month><day>16</day><volume>7</volume><issue>1</issue><fpage>13206</fpage><pub-id pub-id-type="doi">10.1038/s41598-017-13448-3</pub-id><pub-id pub-id-type="medline">29038455</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adeoye</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hui</surname><given-names>L</given-names> </name><name name-style="western"><surname>Koohi-Moghadam</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>JY</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Thomson</surname><given-names>P</given-names> </name></person-group><article-title>Comparison of time-to-event machine learning models in predicting oral cavity cancer prognosis</article-title><source>Int J Med Inform</source><year>2022</year><month>01</month><volume>157</volume><fpage>104635</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2021.104635</pub-id><pub-id pub-id-type="medline">34800847</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kurt Omurlu</surname><given-names>I</given-names> </name><name name-style="western"><surname>Ture</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tokatli</surname><given-names>F</given-names> </name></person-group><article-title>The comparisons of random survival forests and Cox regression analysis with simulation and an application related to breast cancer</article-title><source>Expert Syst Appl</source><year>2009</year><month>05</month><volume>36</volume><issue>4</issue><fpage>8582</fpage><lpage>8588</lpage><pub-id pub-id-type="doi">10.1016/j.eswa.2008.10.023</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cygu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Seow</surname><given-names>H</given-names> </name><name name-style="western"><surname>Dushoff</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bolker</surname><given-names>BM</given-names> </name></person-group><article-title>Comparing machine learning approaches to incorporate time-varying covariates in predicting cancer survival time</article-title><source>Sci Rep</source><year>2023</year><month>01</month><day>25</day><volume>13</volume><issue>1</issue><fpage>1370</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-28393-7</pub-id><pub-id pub-id-type="medline">36697455</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aivaliotis</surname><given-names>G</given-names> </name><name name-style="western"><surname>Palczewski</surname><given-names>J</given-names> </name><name name-style="western"><surname>Atkinson</surname><given-names>R</given-names> </name><name name-style="western"><surname>Cade</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Morris</surname><given-names>MA</given-names> </name></person-group><article-title>A comparison of time to event analysis methods, using weight status and breast cancer as a case study</article-title><source>Sci Rep</source><year>2021</year><month>07</month><day>7</day><volume>11</volume><issue>1</issue><fpage>14058</fpage><pub-id pub-id-type="doi">10.1038/s41598-021-92944-z</pub-id><pub-id pub-id-type="medline">34234154</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qiu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>J</given-names> </name><etal/></person-group><article-title>A comparison study of machine learning (random survival forest) and classic statistic (Cox proportional hazards) for predicting progression in high-grade glioma after proton and carbon ion radiotherapy</article-title><source>Front Oncol</source><year>2020</year><volume>10</volume><fpage>551420</fpage><pub-id pub-id-type="doi">10.3389/fonc.2020.551420</pub-id><pub-id pub-id-type="medline">33194609</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pickett</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Suresh</surname><given-names>K</given-names> </name><name name-style="western"><surname>Campbell</surname><given-names>KR</given-names> </name><name name-style="western"><surname>Davis</surname><given-names>S</given-names> </name><name name-style="western"><surname>Juarez-Colunga</surname><given-names>E</given-names> </name></person-group><article-title>Random survival forests for dynamic predictions of a time-to-event outcome using a longitudinal biomarker</article-title><source>BMC Med Res Methodol</source><year>2021</year><month>10</month><day>17</day><volume>21</volume><issue>1</issue><fpage>216</fpage><pub-id pub-id-type="doi">10.1186/s12874-021-01375-x</pub-id><pub-id pub-id-type="medline">34657597</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qi</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Tao</surname><given-names>F</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Enabling technologies and tools for digital twin</article-title><source>Journal of Manufacturing Systems</source><year>2021</year><month>01</month><volume>58</volume><fpage>3</fpage><lpage>21</lpage><pub-id pub-id-type="doi">10.1016/j.jmsy.2019.10.001</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garciaz</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hospital</surname><given-names>MA</given-names> </name></person-group><article-title>FMS-like Tyrosine Kinase 3 inhibitors in the treatment of acute myeloid leukemia: an update on the emerging evidence and safety profile</article-title><source>Onco Targets Ther</source><year>2023</year><volume>16</volume><fpage>31</fpage><lpage>45</lpage><pub-id pub-id-type="doi">10.2147/OTT.S236740</pub-id><pub-id pub-id-type="medline">36698434</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="web"><article-title>Hawk Cardiff Research Datastore (RDS) access VM</article-title><source>Supercomputing Wales Portal</source><access-date>2025-04-08</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://portal.supercomputing.wales/index.php/hawk-cardiff-research-datastore-rds-access-vm/">https://portal.supercomputing.wales/index.php/hawk-cardiff-research-datastore-rds-access-vm/</ext-link></comment></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bennett</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Catovsky</surname><given-names>D</given-names> </name><name name-style="western"><surname>Daniel</surname><given-names>MT</given-names> </name><etal/></person-group><article-title>Proposals for the classification of the acute leukaemias. French-American-British (FAB) co-operative group</article-title><source>Br J Haematol</source><year>1976</year><month>08</month><volume>33</volume><issue>4</issue><fpage>451</fpage><lpage>458</lpage><pub-id pub-id-type="doi">10.1111/j.1365-2141.1976.tb03563.x</pub-id><pub-id pub-id-type="medline">188440</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="web"><article-title>ECOG performance status scale</article-title><source>ECOG-ACRIN Cancer Research Group</source><access-date>2025-03-08</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ecog-acrin.org/resources/ecog-performance-status/">https://ecog-acrin.org/resources/ecog-performance-status/</ext-link></comment></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="web"><article-title>Centre for Trials Research | Home</article-title><source>Cardiff University</source><access-date>2024-10-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cardiff.ac.uk/centre-for-trials-research">https://www.cardiff.ac.uk/centre-for-trials-research</ext-link></comment></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ying</surname><given-names>X</given-names> </name></person-group><article-title>An overview of overfitting and its solutions</article-title><source>J Phys: Conf Ser</source><year>2019</year><month>03</month><day>1</day><volume>1168</volume><issue>2</issue><fpage>022022</fpage><pub-id pub-id-type="doi">10.1088/1742-6596/1168/2/022022</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lauriola</surname><given-names>I</given-names> </name><name name-style="western"><surname>Lavelli</surname><given-names>A</given-names> </name><name name-style="western"><surname>Aiolli</surname><given-names>F</given-names> </name></person-group><article-title>An introduction to deep learning in natural language processing: models, techniques, and tools</article-title><source>Neurocomputing</source><year>2022</year><month>01</month><volume>470</volume><fpage>443</fpage><lpage>456</lpage><pub-id pub-id-type="doi">10.1016/j.neucom.2021.05.103</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="web"><article-title>Pandas.DataFrame</article-title><source>pandas</source><access-date>2024-10-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html">https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html</ext-link></comment></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="web"><article-title>StandardScaler</article-title><source>scikit-learn</source><access-date>2024-10-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html">https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html</ext-link></comment></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="web"><article-title>General concepts</article-title><source>The Open Group</source><access-date>2024-10-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_16">https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_16</ext-link></comment></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Amorim</surname><given-names>LBV</given-names> </name><name name-style="western"><surname>Cavalcanti</surname><given-names>GDC</given-names> </name><name name-style="western"><surname>Cruz</surname><given-names>RMO</given-names> </name></person-group><article-title>The choice of scaling technique matters for classification performance</article-title><source>Appl Soft Comput</source><year>2023</year><month>01</month><volume>133</volume><fpage>109924</fpage><pub-id pub-id-type="doi">10.1016/j.asoc.2022.109924</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Altmann</surname><given-names>A</given-names> </name><name name-style="western"><surname>Tolo&#x015F;i</surname><given-names>L</given-names> </name><name name-style="western"><surname>Sander</surname><given-names>O</given-names> </name><name name-style="western"><surname>Lengauer</surname><given-names>T</given-names> </name></person-group><article-title>Permutation importance: a corrected feature importance measure</article-title><source>Bioinformatics</source><year>2010</year><month>05</month><day>15</day><volume>26</volume><issue>10</issue><fpage>1340</fpage><lpage>1347</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btq134</pub-id><pub-id pub-id-type="medline">20385727</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Belle</surname><given-names>V</given-names> </name><name name-style="western"><surname>Pelckmans</surname><given-names>K</given-names> </name><name name-style="western"><surname>Suykens</surname><given-names>J</given-names> </name><name name-style="western"><surname>Huffel</surname><given-names>S</given-names> </name></person-group><conf-name>Survival SVM: a practical scalable algorithm</conf-name><conf-date>Apr 23-25, 2008</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.esann.org/sites/default/files/proceedings/legacy/es2008-95.pdf">https://www.esann.org/sites/default/files/proceedings/legacy/es2008-95.pdf</ext-link></comment></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="web"><article-title>Sksurv.svm.fastsurvivalsvm</article-title><source>scikit-survival 0271</source><access-date>2025-03-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html">https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html</ext-link></comment></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Russell</surname><given-names>NH</given-names> </name><name name-style="western"><surname>Burnett</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Hills</surname><given-names>RK</given-names> </name><etal/></person-group><article-title>Long term follow up from the NCRI AML17 trial of attenuated arsenic trioxide and ATRA therapy for newly diagnosed and relapsed acute promyelocytic leukaemia</article-title><source>Blood</source><year>2016</year><month>12</month><day>2</day><volume>128</volume><issue>22</issue><fpage>897</fpage><lpage>897</lpage><pub-id pub-id-type="doi">10.1182/blood.V128.22.897.897</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Geloven</surname><given-names>N</given-names> </name><name name-style="western"><surname>He</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zwinderman</surname><given-names>AH</given-names> </name><name name-style="western"><surname>Putter</surname><given-names>H</given-names> </name></person-group><article-title>Estimation of incident dynamic AUC in practice</article-title><source>Comput Stat Data Anal</source><year>2021</year><month>02</month><volume>154</volume><fpage>107095</fpage><pub-id pub-id-type="doi">10.1016/j.csda.2020.107095</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Napierala</surname><given-names>K</given-names> </name><name name-style="western"><surname>Stefanowski</surname><given-names>J</given-names> </name></person-group><article-title>Types of minority class examples and their influence on learning classifiers from imbalanced data</article-title><source>J Intell Inf Syst</source><year>2016</year><month>06</month><volume>46</volume><issue>3</issue><fpage>563</fpage><lpage>597</lpage><pub-id pub-id-type="doi">10.1007/s10844-015-0368-1</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="web"><article-title>Datasets: imbalanced datasets</article-title><source>'Machine Learning&#x2019;, Google for Developers</source><access-date>2025-01-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://developers.google.com/machine-learning/crash-course/overfitting/imbalanced-datasets">https://developers.google.com/machine-learning/crash-course/overfitting/imbalanced-datasets</ext-link></comment></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chawla</surname><given-names>NV</given-names> </name><name name-style="western"><surname>Bowyer</surname><given-names>KW</given-names> </name><name name-style="western"><surname>Hall</surname><given-names>LO</given-names> </name><name name-style="western"><surname>Kegelmeyer</surname><given-names>WP</given-names> </name></person-group><article-title>SMOTE: synthetic minority over-sampling technique</article-title><source>jair</source><year>2002</year><month>06</month><volume>16</volume><fpage>321</fpage><lpage>357</lpage><pub-id pub-id-type="doi">10.1613/jair.953</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="web"><article-title>Sksurv.metrics.concordance_index_ipcw</article-title><source>scikit-survival 0270</source><access-date>2025-03-11</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://scikit-survival.readthedocs.io/en/stable/api/generated/sksurv.metrics.concordance_index_ipcw.html">https://scikit-survival.readthedocs.io/en/stable/api/generated/sksurv.metrics.concordance_index_ipcw.html</ext-link></comment></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Feng</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>L</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Impact Of ELN risk stratification, induction chemotherapy regimens and hematopoietic stem cell transplantation on outcomes in hyperleukocytic acute myeloid leukemia with initial white blood cell count more than 100 &#x00D7; 10<sup>9</sup>/L</article-title><source>Cancer Manag Res</source><year>2019</year><volume>11</volume><fpage>9495</fpage><lpage>9503</lpage><pub-id pub-id-type="doi">10.2147/CMAR.S225123</pub-id><pub-id pub-id-type="medline">31807075</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Juliusson</surname><given-names>G</given-names> </name><name name-style="western"><surname>J&#x00E4;dersten</surname><given-names>M</given-names> </name><name name-style="western"><surname>Deneberg</surname><given-names>S</given-names> </name><etal/></person-group><article-title>The prognostic impact of FLT3-ITD and NPM1 mutation in adult AML is age-dependent in the population-based setting</article-title><source>Blood Adv</source><year>2020</year><month>03</month><day>24</day><volume>4</volume><issue>6</issue><fpage>1094</fpage><lpage>1101</lpage><pub-id pub-id-type="doi">10.1182/bloodadvances.2019001335</pub-id><pub-id pub-id-type="medline">32203582</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="web"><article-title>Centre for trials research - data requests</article-title><source>Cardiff University</source><access-date>2026-04-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cardiff.ac.uk/centre-for-trials-research/collaborate-with-us/data-requests/">https://www.cardiff.ac.uk/centre-for-trials-research/collaborate-with-us/data-requests/</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Clinical data dictionary.</p><media xlink:href="bioinform_v7i1e75678_app1.xlsx" xlink:title="XLSX File, 48 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>MRD data dictionary.</p><media xlink:href="bioinform_v7i1e75678_app2.xlsx" xlink:title="XLSX File, 28 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>FLT3NPM1 data dictionary.</p><media xlink:href="bioinform_v7i1e75678_app3.xlsx" xlink:title="XLSX File, 18 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>NGS metadata.</p><media xlink:href="bioinform_v7i1e75678_app4.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Feature exclusion summary.</p><media xlink:href="bioinform_v7i1e75678_app5.xlsx" xlink:title="XLSX File, 15 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>Cox proportional hazard regression and regularization techniques.</p><media xlink:href="bioinform_v7i1e75678_app6.docx" xlink:title="DOCX File, 19 KB"/></supplementary-material><supplementary-material id="app7"><label>Multimedia Appendix 7</label><p>Random survival forest.</p><media xlink:href="bioinform_v7i1e75678_app7.docx" xlink:title="DOCX File, 18 KB"/></supplementary-material><supplementary-material id="app8"><label>Multimedia Appendix 8</label><p>All relevant Python files used for preprocessing, building, and validation of RSF/CoxNet models.</p><media xlink:href="bioinform_v7i1e75678_app8.zip" xlink:title="ZIP File, 53 KB"/></supplementary-material><supplementary-material id="app9"><label>Multimedia Appendix 9</label><p>Feature importance values and c-index scores for each ablated model across the repeated k-fold cross-validation from the ablation study phase.</p><media xlink:href="bioinform_v7i1e75678_app9.zip" xlink:title="ZIP File, 17639 KB"/></supplementary-material><supplementary-material id="app10"><label>Multimedia Appendix 10</label><p>Missingness correlation per feature heatmaps and missingness frequency bar charts across AML17 treatment stage cohorts for each data source subset used to train RSF and CoxNet models.</p><media xlink:href="bioinform_v7i1e75678_app10.zip" xlink:title="ZIP File, 4906 KB"/></supplementary-material><supplementary-material id="app11"><label>Multimedia Appendix 11</label><p>Contains (1) feature importance results for final treatment stage-wise model builds both pre- and postfeature reduction, (2) run-time logs of final CoxNet and RSF models for each fold of nested cross-validation, (3) full resolution dynamic AUC and dynamic Brier loss plots for final stage models, and (4) A JSON file of average values across all bootstrapped samples of each fold for all evaluation metrics: c-index, IPCW c-index, dynamic Brier loss, and dynamic AUC.</p><media xlink:href="bioinform_v7i1e75678_app11.zip" xlink:title="ZIP File, 13739 KB"/></supplementary-material></app-group></back></article>