<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Bioinform Biotech</journal-id><journal-id journal-id-type="publisher-id">bioinform</journal-id><journal-id journal-id-type="index">19</journal-id><journal-title>JMIR Bioinformatics and Biotechnology</journal-title><abbrev-journal-title>JMIR Bioinform Biotech</abbrev-journal-title><issn pub-type="epub">2563-3570</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v6i1e76553</article-id><article-id pub-id-type="doi">10.2196/76553</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Conversational Artificial Intelligence for Integrating Social Determinants, Genomics, and Clinical Data in Precision Medicine: Development and Implementation Study of the AI-HOPE-PM System</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Yang</surname><given-names>Ei-Wen</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Waldrup</surname><given-names>Brigette</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>Enrique</given-names></name><degrees>MPH, MS, MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Polyagent</institution><addr-line>San Francisco</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Beckman Research Institute</institution><addr-line>Duarte</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff3"><institution>City Of Hope National Medical Center</institution><addr-line>1500 E Duarte Road</addr-line><addr-line>Duarte, CA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Uzun</surname><given-names>Ece</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Antwi</surname><given-names>Henry Asante</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Khasnavis</surname><given-names>Nithisha</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Enrique Velazquez-Villarreal, MPH, MS, MD, PhD, City Of Hope National Medical Center, 1500 E Duarte Road, Duarte, CA, 91010, United States, 1 6262187162; <email>evelazquezvilla@coh.org</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>10</day><month>10</month><year>2025</year></pub-date><volume>6</volume><elocation-id>e76553</elocation-id><history><date date-type="received"><day>25</day><month>04</month><year>2025</year></date><date date-type="rev-recd"><day>23</day><month>06</month><year>2025</year></date><date date-type="accepted"><day>30</day><month>08</month><year>2025</year></date></history><copyright-statement>&#x00A9; Ei-Wen Yang, Brigette Waldrup, Enrique Velazquez-Villarreal. Originally published in JMIR Bioinformatics and Biotechnology (<ext-link ext-link-type="uri" xlink:href="https://bioinform.jmir.org">https://bioinform.jmir.org</ext-link>), 10.10.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">http://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Bioinformatics and Biotechnology, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://bioinform.jmir.org/">https://bioinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://bioinform.jmir.org/2025/1/e76553"/><abstract><sec><title>Background</title><p>Integrating clinical, genomic, and social determinants of health (SDOH) data is essential for advancing precision medicine and addressing cancer health disparities. However, existing bioinformatics tools often lack the flexibility to perform equity-driven analyses or require significant programming expertise.</p></sec><sec><title>Objective</title><p>We developed AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics), a conversational artificial intelligence system designed to enable natural language&#x2013;driven, multidimensional cancer analysis. This study describes the development, implementation, and application of AI-HOPE-PM to support hypothesis testing that integrates genomic, clinical, and SDOH data.</p></sec><sec sec-type="methods"><title>Methods</title><p>AI-HOPE-PM leverages large language models and Python-based statistical scripts to convert user-defined natural language queries into executable workflows. It was evaluated using curated colorectal cancer datasets from The Cancer Genome Atlas and cBioPortal, enriched with harmonized SDOH variables. Accuracy of natural language interpretation, run time efficiency, and usability were benchmarked against cBioPortal and UCSC Xena.</p></sec><sec sec-type="results"><title>Results</title><p>AI-HOPE-PM successfully supported case-control stratification, survival modeling, and odds ratio analysis using natural language prompts. In colorectal cancer case studies, the system revealed significant disparities in progression-free survival and treatment access based on financial strain, health care access, food insecurity, and social support, demonstrating the importance of integrating SDOH in cancer research. Benchmark testing showed faster task execution compared to existing platforms, and the system achieved 92.5% accuracy in parsing biomedical queries.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>AI-HOPE-PM lowers technical barriers to integrative cancer research by enabling real-time, user-friendly exploration of clinical, genomic, and SDOH data. It expands on prior work by incorporating equity metrics into precision oncology workflows and offers a scalable tool for supporting disparities-focused translational research. Five videos are included as multimedia appendices to demonstrate platform functionality in real-world scenarios.</p></sec></abstract><kwd-group><kwd>artificial intelligence</kwd><kwd>LLM</kwd><kwd>AI agent</kwd><kwd>bioinformatics</kwd><kwd>cancer</kwd><kwd>genomics</kwd><kwd>precision medicine</kwd><kwd>social determinants of health</kwd><kwd>large language model</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Health care is being transformed by comprehensive precision medicine, which personalizes treatment based on individual differences in genetics, environment, and lifestyle [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Alongside this shift, there is growing recognition of the critical role social determinants of health (SDOH) play in shaping disease outcomes and access to care [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. To advance both scientific discovery and health equity, integrating clinical, genomic, and SDOH data is imperative for uncovering disease mechanisms, enhancing treatment effectiveness, and reducing disparities&#x2014;especially among underserved populations. However, several challenges remain: data silos, the need for specialized expertise in multiomics analysis, and the underrepresentation of diverse populations in existing datasets all continue to hinder the equitable realization of precision medicine [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref9">9</xref>].</p><p>The complexity of cancer research workflows demands seamless integration of molecular profiles, clinical metadata, and population-level variables such as race, ethnicity, income, health literacy, and access to care. Although web-based tools like cBioPortal [<xref ref-type="bibr" rid="ref10">10</xref>] and UALCAN [<xref ref-type="bibr" rid="ref11">11</xref>] offer structured platforms for querying public cancer datasets such as The Cancer Genome Atlas (TCGA) [<xref ref-type="bibr" rid="ref12">12</xref>], they operate within predefined analytical frameworks and require users to manually conduct multistep filtering, stratification, and statistical interpretation [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. These limitations restrict the flexibility needed to explore hypothesis-driven, context-specific research questions&#x2014;especially those involving SDOH variables critical for addressing health equity.</p><p>Meanwhile, emerging artificial intelligence (AI)&#x2013;based tools like CellAgent [<xref ref-type="bibr" rid="ref17">17</xref>] and AutoBA [<xref ref-type="bibr" rid="ref18">18</xref>] have begun to explore the potential of large language models (LLMs) in bioinformatics workflows [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. However, these systems often focus solely on genomic data and lack the capacity to simultaneously integrate clinical and SDOH variables, thereby limiting their utility in advancing equitable biomedical research.</p><p>Motivated by these gaps, we introduced AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics), a novel LLM-powered conversational agent designed to democratize access to integrative bioinformatics analysis. AI-HOPE-PM allows users&#x2014;regardless of technical background&#x2014;to conduct robust, multidimensional cancer research using natural language queries. As illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>, the platform employs natural language processing, retrieval-augmented generation, and Python-based bioinformatics pipelines to translate user queries into reproducible and explainable analyses. This includes case&#x2013;control comparisons, survival modeling, and stratified multiomics analysis&#x2014;all without requiring code or manual data preprocessing.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Overview of AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics) workflow. LLM: large language model; SDOH: social determinants of health; TCGA: The Cancer Genome Atlas.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="bioinform_v6i1e76553_fig01.png"/></fig><p>Unlike traditional graphical user interface (GUI) tools, AI-HOPE-PM supports complex, user-defined queries such as &#x201C;Analyze FOLFOX-treated colorectal cancer (CRC) patients with TP53 mutations and varying levels of financial strain.&#x201D; The system autonomously identifies relevant data, filters patient cohorts, integrates clinical treatment and genomic mutation data with socioeconomic context, and generates statistical visualizations, survival curves, and interpretative summaries. By enabling real-time, dynamic exploration of clinical-genomic-SDOH interactions, AI-HOPE-PM simplifies complex workflows and enhances the translational relevance of precision oncology research. This work builds on our previously developed platform, AI-HOPE [<xref ref-type="bibr" rid="ref23">23</xref>], a conversational AI agent designed to support natural language-driven integration of clinical and genomic data for precision medicine research. While AI-HOPE demonstrated effective local analysis of structured datasets and addressed key bioinformatics needs, it did not incorporate SDOH or population-level variables critical to health equity research. AI-HOPE-PM extends this foundation by integrating SDOH data and supporting population-aware case-control analyses, enabling researchers to interrogate disparities across both molecular and social axes. To evaluate its performance, AI-HOPE-PM is being benchmarked against established tools such as cBioPortal and UCSC Xena [<xref ref-type="bibr" rid="ref24">24</xref>]. The benchmarking involves assessing run time efficiency, reproducibility, and usability. In contrast to tools that require step-by-step configuration, AI-HOPE-PM offers streamlined execution of advanced bioinformatics pipelines through LLM-guided user interaction, significantly lowering barriers to data exploration and hypothesis testing.</p><p>By bridging the gap between data complexity and user accessibility, AI-HOPE-PM offers a scalable, inclusive, and equitable AI framework for biomedical discovery. Its ability to integrate clinical, genomic, and SDOH variables addresses the long-standing need for tools that not only generate high-quality insights but also promote diversity and inclusiveness in biomedical research.</p><p>To address the limitations of current bioinformatics tools and advance equity in translational precision medicine, this study introduces AI-HOPE-PM&#x2014;a novel conversational AI platform purpose-built to integrate clinical, genomic, and SDOH data through natural language interaction. The aim of this paper is to describe the development, implementation, and application of AI-HOPE-PM for multidimensional cancer analysis, with a focus on its ability to democratize data exploration, reduce technical barriers, and enable equity-driven hypothesis testing. Specifically, we demonstrate how AI-HOPE-PM enables real-time, case-control, and survival analyses that incorporate SDOH variables such as financial strain, food insecurity, health care access, and health literacy, alongside genomic and clinical features. By benchmarking its performance and illustrating its use through case studies in CRC, we highlight the platform&#x2019;s potential to accelerate disparities-focused research, improve biomarker discovery, and support inclusive precision oncology.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Development of AI-HOPE-PM and Data Sources</title><p>AI-HOPE-PM is a conversational AI platform designed to advance translational precision oncology by enabling users to perform integrative bioinformatics analyses through plain-language queries. The system is built on a retrieval-augmented generation framework&#x2014;a method that enhances response accuracy by retrieving relevant information from structured datasets&#x2014;and a fine-tuned biomedical LLM (LLaMA 3). Behind the scenes, the platform uses Python-based scripts to carry out statistical analyses and genomic data processing.</p><p>To enable robust analyses, we used curated multimodal datasets from TCGA, AACR Project GENIE, and cBioPortal. These datasets included harmonized clinical, genomic, and demographic variables. In addition, we generated synthetic SDOH variables using a validated Python script, guided by a literature-informed framework. These SDOH features included health care access, financial strain, food insecurity, social support, and health literacy. All datasets were preprocessed into standardized tab-delimited formats with annotated metadata describing each variable type. A full list of variables analyzed&#x2014;including over 200 clinical, genomic, treatment, and SDOH fields&#x2014;is publicly available [<xref ref-type="bibr" rid="ref25">25</xref>], which also contains the source code, example queries, simulated data, and documentation for reproducing all analyses.</p></sec><sec id="s2-2"><title>Workflow and Natural Language Interface</title><p>Users interact with AI-HOPE-PM via a GUI that accepts plaintext queries. The system interprets these queries using a natural language processing engine to define analytic tasks, including loading a dataset, stratifying cohorts based on genomic or SDOH features, and performing statistical analyses such as survival modeling or odds ratio testing. The resulting structured commands are executed programmatically, streamlining workflows that typically require multiple manual steps or coding expertise.</p></sec><sec id="s2-3"><title>Evaluation and Validation of System Accuracy</title><p>We evaluated AI-HOPE-PM&#x2019;s query interpretation accuracy using 100 natural language prompts that reflected diverse real-world research scenarios in clinical genomics and health disparities. A team of expert reviewers established ground truth interpretations for each query to assess system performance. AI-HOPE-PM achieved an overall accuracy of 92.5%, with near-perfect accuracy (99.1%) for single-variable queries and strong performance (88.4%) for more complex, multivariable prompts. Most errors stemmed from ambiguous phrasing (eg, nonspecific end points), syntactic inconsistencies (eg, nested logic), or misalignment between user language and system variable mappings. To address these issues, AI-HOPE-PM integrates built-in clarification prompts and applies a domain-specific ontology to harmonize terminology and guide users toward more structured input. Future development will focus on improving the natural language understanding engine and refining internal mapping algorithms to further enhance accuracy and reproducibility.</p><p>To confirm the analytical fidelity of AI-HOPE-PM, we cross-validated its survival analyses, odds ratio outputs, and cohort stratifications against manually performed analyses previously published by our group using similar datasets and variables. These included studies investigating CRC disparities based on <italic>TP53</italic>, <italic>APC</italic>, and <italic>KRAS</italic> mutation status, treatment modality, and SDOH factors across TCGA and cBioPortal cohorts. The results generated by AI-HOPE-PM were consistent with those from traditional statistical pipelines in terms of hazard ratios, <italic>P</italic> values, and overall survival trends. This validation step supports the platform&#x2019;s accuracy in replicating established findings and reinforces its reliability as a tool for real-time, natural language&#x2013;driven bioinformatics analyses.</p><p>Although we benchmarked AI-HOPE-PM against established platforms such as cBioPortal and UCSC Xena, it is important to acknowledge that these platforms function through traditional GUIs requiring multistep, click-based interactions. This structural difference makes direct comparisons with AI-HOPE-PM&#x2014;an intelligent, conversational AI system&#x2014;challenging. In cBioPortal and Xena, executing multilayered queries or stratified analyses may involve multiple browser windows, dropdown menus, and manual dataset subsetting. In contrast, AI-HOPE-PM enables users to perform similar tasks via a single plain-language prompt, streamlining the process and reducing complexity. While speed remains an advantage for AI-HOPE-PM, we also validated its outputs through comparisons with previously published manual analyses, ensuring consistency and analytical fidelity. This intelligent design is intended to reduce the technical barrier for researchers and support scalable, real-time hypothesis generation in precision medicine <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>To ensure robustness in handling natural language variability, AI-HOPE-PM incorporates an interactive clarification mechanism that prompts users for additional input when queries are ambiguous or underspecified. Common edge cases include vague end points (eg, &#x201C;better outcomes&#x201D;), undefined comparison groups, or syntactic inconsistencies (eg, nested logic). In these instances, the system pauses execution and requests clarification through a structured prompt. Furthermore, AI-HOPE-PM uses a curated biomedical ontology to harmonize synonymous terms and align user inputs with internal variable definitions. These strategies support resilient query interpretation and maintain analytical fidelity across diverse and potentially ambiguous user queries (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p></sec><sec id="s2-4"><title>Benchmarking and Comparative Analysis</title><p>To assess usability and speed, we benchmarked AI-HOPE-PM against existing platforms including cBioPortal and UCSC Xena. Biomedical researchers were asked to complete tasks such as dataset loading, filtering based on genomic or SDOH attributes, and initiating analyses. Task durations were measured using stopwatch protocols. AI-HOPE-PM consistently outperformed traditional tools in terms of execution time and ease of use, owing to its automation and intuitive language-driven interface.</p><p>To evaluate the capacity of AI-HOPE-PM to integrate and analyze SDOH alongside clinical and genomic data, we developed a set of simulated SDOH variables. These variables&#x2014;including financial strain, food insecurity, social support, health literacy, and insurance access&#x2014;were generated using a Python-based simulation framework informed by published epidemiological distributions and associations relevant to cancer outcomes. The simulation approach was designed to mirror the variability and prevalence observed in real-world populations [<xref ref-type="bibr" rid="ref26">26</xref>], thereby enabling realistic case&#x2013;control stratifications and hypothesis testing. Although these SDOH variables are simulated, they serve as a pragmatic proxy in the absence of widely available, high-quality, individual-level SDOH data within public genomic datasets. For full transparency and reproducibility, the simulation scripts are publicly available [<xref ref-type="bibr" rid="ref25">25</xref>]. Future validation studies using empirical SDOH data from institutional and community-linked datasets are planned to further refine and expand the platform&#x2019;s capabilities.</p></sec><sec id="s2-5"><title>Statistical Analysis and Report Generation</title><p>The platform supports several statistical methods commonly used in cancer genomics, including Kaplan-Meier survival analysis with log-rank testing, Cox proportional hazards regression, and odds ratio calculations for categorical comparisons. Output includes plots such as survival curves and forest plots, accompanied by narrative summaries that describe the findings in context. All outputs are backed by reproducible Python code logs, which are stored internally and can be exported for validation or inclusion in publications [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>].</p></sec><sec id="s2-6"><title>Usability Study and Accessibility</title><p>A formal usability study is underway to evaluate AI-HOPE-PM&#x2019;s effectiveness and accessibility for biomedical researchers. Participants are comparing its interface, output quality, and query interpretation capabilities with those of GUI-based tools and other AI-driven platforms. While we did not perform head-to-head comparisons with generative systems such as CellAgent or AutoBA due to differing scopes, AI-HOPE-PM&#x2019;s unique ability to integrate SDOH, clinical, and genomic data positions it as a novel tool for equitable and scalable precision medicine research.</p><p>To preliminarily assess usability, we conducted a small-scale case study involving six non-bioinformatician users, including oncology fellows and public health researchers. Participants were asked to perform common research tasks using AI-HOPE-PM&#x2014;such as loading datasets, selecting cohorts by genomic and social variables, and running survival analyses&#x2014;using only natural language queries. All users completed the tasks successfully, with positive feedback highlighting the intuitive interface, rapid execution, and elimination of the need for coding expertise. These findings provide initial validation of the platform&#x2019;s accessibility to diverse research users.</p></sec><sec id="s2-7"><title>Ethical and Privacy Considerations</title><p>As with any LLM-based system, AI-HOPE-PM is susceptible to biases and potential hallucinations, particularly when interpreting complex or underspecified queries. To mitigate these risks, the system integrates domain-specific ontologies and harmonized variable dictionaries to reduce misinterpretation and support consistent query resolution. Additionally, the platform&#x2019;s built-in clarification prompts serve as a real-time validation mechanism, prompting users to confirm or refine ambiguous instructions. While simulated SDOH-genomic interactions provide a useful testing framework, future efforts will emphasize empirical validation using real-world datasets to reduce confounding.</p><p>To address privacy concerns when working with sensitive real-world SDOH variables&#x2014;such as insurance status, ethnicity, and income&#x2014;AI-HOPE-PM is designed to operate as a secure, local AI system deployed within institutional infrastructures. Unlike cloud-based models that may transmit data externally, AI-HOPE-PM processes all data on-site, minimizing the risk of exposure or unauthorized access. This local deployment model supports compliance with data protection regulations, including the Health Insurance Portability and Accountability Act and the General Data Protection Regulation, where applicable. In future iterations, we plan to integrate customizable privacy modules and access controls to align with institutional review board protocols and ensure ethical handling of sensitive population-level health data.</p><p>To mitigate the risk of hallucinations and enhance the reliability of AI-HOPE-PM&#x2019;s outputs, the platform incorporates several ethical safeguards. First, the system leverages domain-specific biomedical ontologies to align user queries with validated clinical and genomic concepts, reducing the likelihood of misinterpretation. Second, AI-HOPE-PM includes built-in prompts that clarify ambiguous user input, supporting more accurate query resolution. We also plan to implement human-in-the-loop verification workflows and bias detection modules, which will allow researchers to review, confirm, and flag generated outputs prior to downstream use. These strategies collectively enhance interpretability, accountability, and user trust in the AI-driven analytical process.</p><p>Given the sensitivity of SDOH data, especially variables such as insurance status, ethnicity, or socioeconomic conditions, AI-HOPE-PM is currently designed as a locally deployed system to prevent data exposure through external servers or third-party services. This architecture ensures that no identifiable information is shared beyond institutional firewalls. For future deployments that may involve real-world SDOH data, we plan to incorporate privacy-preserving methods including data deidentification, access controls, and secure computation protocols. All future iterations will comply with established data protection regulations such as the Health Insurance Portability and Accountability Act and the General Data Protection Regulation, ensuring responsible and ethical use of sensitive population-level data.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>By converting natural language instructions into executable bioinformatics workflows, AI-HOPE-PM enabled seamless integration and analysis of clinical, genomic, and SDOH data within CRC datasets. The platform&#x2019;s ability to interpret user queries and automate complex analyses demonstrated its effectiveness in supporting multidimensional, translational cancer research. Through its intuitive conversational interface, the system dynamically classified patient samples into case and control cohorts based on user-defined criteria. These criteria encompassed gene mutation status, treatment regimens, SDOH attributes, and demographic variables, facilitating highly customizable stratifications. The system autonomously performed statistical analyses&#x2014;including prevalence estimation, odds ratio tests, and survival modeling&#x2014;and generated comprehensive visualizations and interpretable reports.</p><p>In a prominent use case, AI-HOPE-PM analyzed data from the TCGA COAD dataset to investigate how financial strain affects outcomes among folinic acid, fluorouracil, and oxaliplatin (FOLFOX)&#x2013;treated patients with CRC with <italic>TP53</italic> mutations (<xref ref-type="fig" rid="figure2">Figure 2</xref>). The analysis began by selecting the COAD dataset enriched with SDOH data, allowing users to explore attribute distributions such as financial strain. A bar chart visualization was generated, showing both the count and percentage distribution of financial strain levels across the dataset (<xref ref-type="fig" rid="figure2">Figure 2A</xref>). Based on user-defined filtering criteria&#x2014;patients treated with FOLFOX and harboring <italic>TP53</italic> mutations&#x2014;AI-HOPE-PM created two cohorts: a case cohort of 40 (10.9%) patients reporting mild or no financial issues and a control cohort of 43 (11.7%) patients experiencing moderate to severe financial strain, including those unable to afford care. Pie charts illustrated the proportional distribution of these cohorts within the total 366-sample dataset (<xref ref-type="fig" rid="figure2">Figure 2B</xref>). Once cohorts were defined, the user selected a survival analysis module. AI-HOPE-PM performed a Kaplan-Meier analysis to assess both overall and progression-free survival. The resulting survival plots demonstrated significantly shorter survival in the control group compared to the case group, with <italic>P</italic> values of .05 (overall survival) and .03 (progression-free survival), supported by CIs indicating statistical robustness (<xref ref-type="fig" rid="figure2">Figure 2C</xref>). These findings underscore AI-HOPE-PM&#x2019;s ability to integrate clinical, genomic, and SDOH data through natural language&#x2013;guided workflows, enabling rapid identification of clinically meaningful disparities in treatment outcomes and survival. This functionality is further supported by the multimedia demonstration with a similar query [<xref ref-type="bibr" rid="ref27">27</xref>].</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics) analysis of folinic acid, fluorouracil, and oxaliplatin&#x2013;treated patients with colorectal cancer with <italic>TP53</italic> mutations and varying levels of financial strain.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="bioinform_v6i1e76553_fig02.png"/></fig><p>Another case study explored the impact of <italic>APC</italic> mutation status among patients with CRC treated with FOLFOX and reporting easy access to health care (<xref ref-type="fig" rid="figure3">Figure 3</xref>). The analysis began by selecting the COAD dataset enriched with SDOH, which enabled exploration of the distribution of health care access variables. A bar chart was generated to visualize both the count and percentage of patients stratified by their reported level of health care access (<xref ref-type="fig" rid="figure3">Figure 3A</xref>). AI-HOPE-PM then applied user-defined filters to create case and control cohorts. The case cohort consisted of 40 (10.9%) patients who had <italic>APC</italic> mutations, reported easy access to health care, and received FOLFOX treatment. The control cohort comprised 12 (3.3%) patients who met the same filtering criteria except they were <italic>APC</italic> wild-type. Pie charts illustrated the proportional distribution of these cohorts out of the total 366 patients in the dataset (<xref ref-type="fig" rid="figure3">Figure 3B</xref>). After defining the cohorts, AI-HOPE-PM enabled the user to run a Kaplan-Meier survival analysis, which revealed that patients in the control group (<italic>APC</italic> wild-type) experienced significantly poorer progression-free survival, with a <italic>P</italic> value of .02, as shown in the survival plot (<xref ref-type="fig" rid="figure3">Figure 3C</xref>). This suggests a potential prognostic role of <italic>APC</italic> mutation status under standardized treatment and access conditions. Additionally, the system performed an odds ratio analysis to assess differences in ethnic representation between the cohorts. In this context, Hispanic/Latino identity was used as the comparative variable. The case cohort included 6 (15%) in-context Hispanic/Latino patients and 34 out-of-context patients, while the control cohort included 3 (15%) in-context patients and 9 out-of-context patients. The resulting odds ratio was 0.529 (95% CI 0.11-2.541), indicating a lower&#x2014;but not statistically significant&#x2014;representation of Hispanic/Latino individuals in the control group (<xref ref-type="fig" rid="figure3">Figure 3D</xref>). Together, these results reinforce AI-HOPE-PM&#x2019;s ability to integrate genomic, clinical, and SDOH variables and to highlight the importance of considering ancestral background and access to care when evaluating mutation-driven outcomes in precision oncology. As shown through the multimedia demonstration with a similar query [<xref ref-type="bibr" rid="ref28">28</xref>], the platform effectively processes complex, user-defined inputs.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics) analysis of patients with colorectal cancer with and without <italic>APC</italic> mutations that have easy access to health care and treated with folinic acid, fluorouracil, and oxaliplatin.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="bioinform_v6i1e76553_fig03.png"/></fig><p>In a third application, AI-HOPE-PM examined patients with early-onset CRC (age &#x003C;50 y) to evaluate the impact of social support on survival outcomes among those treated with FOLFOX chemotherapy (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The user began by selecting the COAD dataset containing enriched SDOH data. This enabled visualization of patient-level attributes such as age, treatment type, mutation status, and social support level. Histograms and bar plots provided an overview of the distribution of these variables across the cohort (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>A). The case cohort was defined using user-set criteria: patients younger than 50 years, treated with FOLFOX, and classified as having strong or moderate social support, resulting in 17 patients. This subset was visualized using pie charts to reflect the proportion of selected versus total samples (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>B). In parallel, the control cohort was defined with the same criteria except for social support, selecting 14 patients who reported limited or no support. A similar pie chart depicted the sample distribution for the control group (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>C). A Kaplan-Meier survival analysis was then conducted to assess overall and progression-free survival differences between the two groups. The survival curves revealed a statistically significant difference in progression-free survival (<italic>P</italic>=.02), with the control group experiencing poorer outcomes. Although the difference in overall survival did not reach statistical significance (<italic>P</italic>=.07), a trend toward worse survival in the control group was observed (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>D). To further characterize these groups, an odds ratio analysis was performed using <italic>TP53</italic> mutation status as the comparative context. The case group had a lower&#x2014;but not statistically significant&#x2014;prevalence of <italic>TP53</italic> mutations, resulting in an odds ratio of 0.706 (95% CI 0.208-2.396; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>E). These findings suggest that lower levels of social support may be linked to poorer progression-free survival and potentially associated with mutational profiles, reinforcing the importance of incorporating psychosocial variables in precision oncology&#x2014;particularly in younger patients with CRC. The capabilities of AI-HOPE-PM are further demonstrated in the multimedia example of a comparable query [<xref ref-type="bibr" rid="ref29">29</xref>].</p><p>In another analysis focused on food insecurity, AI-HOPE-PM investigated survival disparities and treatment access among patients with CRC with <italic>APC</italic> mutations (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). The analysis began with the selection of the COAD dataset integrated with SDOH, allowing the user to visualize variables such as food insecurity, treatment type, and <italic>APC</italic> mutation status. Histograms and bar plots summarized the distribution of these attributes across the cohort&#x2014;highlighting proportions of food-insecure patients and chemotherapy exposure stratified by mutation status (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>A). The case cohort was defined as patients reporting no food insecurity and having an <italic>APC</italic> mutation, yielding 245 samples. A pie chart illustrated the representation of this subset within the overall dataset (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>B). The control cohort was established using the same criteria&#x2014;<italic>APC</italic> mutation present&#x2014;but selecting patients with moderate-to-severe food insecurity, resulting in 206 samples. A corresponding pie chart depicted this cohort&#x2019;s proportional distribution (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>C). A Kaplan-Meier progression-free survival analysis was performed, stratifying patients by treatment type, specifically whether they received chemotherapy. Although exact <italic>P</italic> values were not displayed in the figure, the survival curves showed a clear separation, suggesting poorer outcomes in food-insecure patients not receiving chemotherapy (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>D). These visual trends support the finding that food-insecure patients experienced significantly worse progression-free survival, as confirmed by a <italic>P</italic> value of .02 from the associated analysis. To further explore this disparity, an odds ratio analysis was conducted using TREATMENT_TYPE (chemotherapy vs nonchemotherapy) as the comparative context. The results revealed that food-insecure patients were less likely to receive chemotherapy, with an odds ratio of 0.356 (95% CI 0.136-1.186), indicating a potential treatment access gap (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>E). This analysis underscores the ability of AI-HOPE-PM to uncover how socioeconomic burden, in conjunction with genomic context, may modulate both treatment delivery and clinical outcomes in CRC. As illustrated through the multimedia demonstration of a similar query [<xref ref-type="bibr" rid="ref30">30</xref>], the platform effectively interprets complex natural language inputs.</p><p>Sex-based disparities were explored in a separate analysis focusing on patients with CRC with limited health literacy who were treated with FOLFOX chemotherapy (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). AI-HOPE-PM utilized the COAD dataset enriched with SDOH and genomic annotations to assess the intersection of insurance status, tumor stage, and <italic>KRAS</italic> mutation status. Bar charts provided an overview of insurance coverage within the dataset, illustrating both the absolute counts and proportional distribution across different insurance categories (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>A). To define the cohorts, the case group was filtered to include insured patients who had <italic>KRAS</italic> mutations, were diagnosed at stage I or II, and received leucovorin-based chemotherapy, yielding 31 samples. This subset was visualized using a pie chart to indicate its proportion out of the total 373 samples (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>B). The control cohort applied identical clinical and molecular filters but included only uninsured patients, resulting in 30 samples (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>C). This side-by-side comparison emphasizes how insurance coverage may influence patient stratification and treatment access, even under otherwise uniform clinical conditions. An odds ratio test was performed using <italic>KRAS</italic> mutation status as the defining context to examine mutation prevalence differences between insured and uninsured groups. A stacked bar chart visualized the distribution of in-context (<italic>KRAS</italic>-mutated) versus out-of-context samples in each group. The analysis revealed a modest difference in <italic>KRAS</italic> mutation representation, suggesting that financial access to care could intersect with genomic profiles in ways that warrant deeper investigation (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>D). In a related sex-disparity analysis among patients with limited health literacy, AI-HOPE-PM defined a case cohort of 33 females and a control cohort of 41 males, both treated with FOLFOX. Odds ratio testing using <italic>KRAS</italic> mutation status showed that 30.3% of females and 56.1% of males were <italic>KRAS</italic>-mutated, yielding an odds ratio of 0.503 (95% CI 0.192-1.319; <italic>P</italic>=.24). Although not statistically significant, these findings suggest potential sex-based differences in <italic>KRAS</italic> mutation prevalence under constrained health literacy conditions and highlight the utility of AI-HOPE-PM for uncovering multidimensional disparities in cancer genomics and treatment. This process is illustrated through the multimedia demonstration of a similar query [<xref ref-type="bibr" rid="ref31">31</xref>].</p><p>AI-HOPE-PM also facilitated analyses of nongenomic SDOH influences on CRC outcomes. In one study, the platform was used to explore how insurance status, treatment exposure, and clinical care setting affected survival among patients in the COAD dataset (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). The analysis began with the selection of a dataset enriched with SDOH attributes. Bar charts provided a comprehensive overview of insurance type distribution, showing both the absolute number of patients per insurance category and their relative proportions, offering insight into the socioeconomic landscape of the cohort (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>A). Using this context, AI-HOPE-PM defined a case cohort of 41 insured patients with the following characteristics: stage IV CRC, Hispanic/Latino ethnicity, FOLFOX treatment, and care received at a community oncology practice. A pie chart visualized the size of this cohort relative to the dataset (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>B). A control cohort was generated using the same clinical and demographic criteria but restricted to uninsured patients, resulting in 22 samples. The corresponding pie chart highlighted the discrepancy in sample size and access between the insured and uninsured groups (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>C). Following cohort definition, AI-HOPE-PM performed a Kaplan-Meier survival analysis to evaluate overall survival outcomes. The survival plots illustrated a clear separation between the two groups, with uninsured patients showing poorer survival outcomes, despite receiving similar treatments and having similar disease profiles (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>D). While the figure does not specify <italic>P</italic> values or CIs, the divergence in survival curves strongly suggests a detrimental impact of lack of insurance on patient outcomes. These findings underscore the critical role of insurance coverage in modulating survival, even when controlling for genomic, treatment, and staging variables. As demonstrated in the multimedia example using a comparable query [<xref ref-type="bibr" rid="ref26">26</xref>], the platform accurately handles complex, user-driven inputs.</p><p>This study complements other AI-HOPE-PM findings by leveraging its capacity to integrate SDOH with clinical and genomic data to uncover disparities in CRC care and outcomes. In one analysis, the system examined the relationship between moderate to severe financial strain and CRC screening adherence, revealing that patients experiencing economic hardship were significantly less likely to participate in screening programs, highlighting a critical barrier to early detection (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). The analysis began with the selection of the SocialFactors_COAD dataset, enabling structured visualization of variables such as <italic>APC</italic> mutation status and health care access levels. Bar plots showed both the frequency of <italic>APC</italic> mutations and the distribution of health care access categories within the full cohort (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>A). A case cohort of 326 patients was created using filters for limited health care access; treatment with agents in fluorouracil, leucovorin, and oxaliplatin; and presence of <italic>APC</italic> mutations (mutation_status=1). A pie chart depicted their proportion relative to the total dataset (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>B). A control cohort of 354 patients was defined using the same criteria except for <italic>APC</italic> wild-type status (mutation_status=0). Their distribution was similarly visualized (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>C). A Kaplan-Meier progression-free survival analysis was then performed, stratified by chemotherapy treatment status and highlighting differences particularly among Hispanic/Latino patients. The survival curves revealed a noticeable separation between groups, suggesting a potential survival disadvantage linked to disparities in health care access and genomic background (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>D). Additionally, an odds ratio analysis evaluated treatment disparities based on chemotherapy exposure across the defined cohorts. A bar plot illustrated differences in chemotherapy receipt, reinforcing how limited access to care and mutation status may jointly influence treatment pathways and clinical outcomes (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>E). Other AI-HOPE-PM analyses supported these observations. One study found that patients reporting low social support or isolation had higher rates of treatment discontinuation and worse survival outcomes, consistent with psychosocial oncology literature [<xref ref-type="bibr" rid="ref24">24</xref>]. The platform also uncovered racial and ethnic disparities in progression-free survival, with non-Hispanic White patients demonstrating better outcomes than Black and Hispanic patients, even after adjusting for treatment type and disease stage. Collectively, these results underscore the value of incorporating SDOH variables into precision medicine frameworks, enabling AI-HOPE-PM to reveal systemic inequities that might otherwise be overlooked in genomic-only analyses. The multimedia demonstration of a similar query [<xref ref-type="bibr" rid="ref29">29</xref>] highlights the platform&#x2019;s ability to interpret and execute complex, user-defined instructions.</p><p>AI-HOPE-PM demonstrated high computational efficiency, executing high-dimensional case-control studies involving over 10,000 patient records in under 1 minute. In a benchmark comparison, the platform required only 28.02 seconds to open the application, select a database, and filter a single data attribute&#x2014;significantly faster than cBioPortal (58.01 s) and UCSC Xena (46.06 s). By automating the ingestion, filtering, analysis, and reporting stages, AI-HOPE-PM substantially reduced manual burden and turnaround time compared to conventional bioinformatics tools. This performance underscores its value as a scalable AI platform capable of delivering real-time, integrative data analysis to support precision oncology and health equity research.</p><p>In a comparative timing analysis, AI-HOPE-PM significantly outperformed established platforms such as cBioPortal and UCSC Xena in executing basic data query tasks. The standardized task&#x2014;which included launching the application, selecting a dataset, and applying a filter based on a single data attribute&#x2014;was completed in just 28.02 seconds using AI-HOPE-PM. In contrast, the same task required 58.01 seconds on cBioPortal and 46.06 seconds on UCSC Xena. These results underscore the efficiency advantages of AI-HOPE-PM&#x2019;s natural language&#x2013;driven, automated workflow, which streamlines multistep analyses and reduces manual input time compared to traditional GUI-based platforms.</p></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This study presents the development and application of AI-HOPE-PM, a conversational AI system designed to integrate clinical, genomic, and SDOH data for precision oncology research. AI-HOPE-PM addresses key limitations in existing bioinformatics tools by enabling users to pose natural language queries that are automatically translated into executable workflows. This allows for case-control stratification and hypothesis testing that include both molecular and nonmolecular variables.</p><p>In multiple CRC case studies, AI-HOPE-PM demonstrated the ability to reveal associations between genomic alterations (eg, <italic>TP53</italic> and <italic>APC</italic> mutations), treatment exposures (eg, FOLFOX chemotherapy), and SDOH variables such as financial strain, food insecurity, health care access, and social support. These findings underscore the importance of contextualizing genomic data within broader socioeconomic and behavioral frameworks to better understand cancer disparities and inform population-relevant strategies.</p></sec><sec id="s4-2"><title>Comparison to Prior Work</title><p>Traditional tools such as cBioPortal and UCSC Xena have facilitated broad access to public cancer genomic datasets, yet they require manual, multistep filtering and operate within fixed analytical frameworks. These platforms typically lack support for SDOH integration and require a certain level of technical expertise, limiting their accessibility for noncomputational researchers and clinicians. More recent tools like CellAgent [<xref ref-type="bibr" rid="ref17">17</xref>] and AutoBA [<xref ref-type="bibr" rid="ref18">18</xref>] have begun to explore the use of LLMs in biomedical contexts, but their scope is generally limited to genomic analysis and does not extend to the integration of clinical or social variables essential for advancing health equity.</p><p>Our group&#x2019;s prior work introduced AI-HOPE, a closed-system, LLM-driven conversational agent designed to enable integrative clinical and genomic data analyses through natural language interactions [<xref ref-type="bibr" rid="ref23">23</xref>]. AI-HOPE allows users to perform association studies, prevalence assessments, and survival analyses on locally stored datasets while maintaining data security and interpretability. It demonstrated its capabilities by identifying well-documented associations in TCGA CRC datasets, such as the enrichment of <italic>TP53</italic> mutations in late-stage CRC and the association of <italic>KRAS</italic> mutations with poor progression-free survival in FOLFOX-treated patients. While AI-HOPE addressed the integration of clinical and genomic data, it was not explicitly designed to handle population-level equity metrics or SDOH variables.</p><p>AI-HOPE-PM builds on and significantly extends this foundation by incorporating SDOH dimensions&#x2014;such as financial strain, health care access, food insecurity, and health literacy&#x2014;into its analytical framework. This addition allows researchers to study cancer outcomes in a more holistic context, bridging molecular findings with real-world social environments. Furthermore, AI-HOPE-PM expands the scope of natural language query handling to accommodate multimodal stratification involving genomic, clinical, and social parameters, which is essential for addressing health disparities. By doing so, it complements AI-HOPE&#x2019;s functionality while introducing new capabilities that are critical for equity-focused translational research.</p></sec><sec id="s4-3"><title>Strengths and Limitations</title><p>A key strength of AI-HOPE-PM is its ability to perform integrative, user-defined analyses through natural language queries without requiring programming expertise. This significantly reduces technical barriers for clinician-scientists and public health researchers. Importantly, the platform enables the inclusion of SDOH variables&#x2014;such as financial strain, health care access, and social support&#x2014;that are often absent from traditional bioinformatics workflows. Its modular architecture supports rapid cohort definition, survival modeling, and odds ratio testing across large, harmonized datasets, allowing for real-time hypothesis generation and exploratory analysis.</p><p>However, several limitations should be acknowledged. First, while this study used harmonized and simulated SDOH variables to demonstrate the platform&#x2019;s functionality, the availability and quality of real-world, longitudinal SDOH data remain limited in many health care systems. This may affect the generalizability of findings and the real-world applicability of the platform. Future efforts will require integration with validated, longitudinal SDOH datasets&#x2014;potentially through partnerships with clinical institutions and population health data repositories. Second, AI-HOPE-PM&#x2019;s current design is optimized for structured, publicly available datasets such as TCGA, cBioPortal, and AACR GENIE. As such, its adaptability to unstructured clinical data or eHealth records is limited. While this design choice enhances reproducibility and alignment with standardized biomedical ontologies, future work should explore interoperability with clinical informatics platforms and natural language extraction from eHealth records to expand usability in health care settings. Third, this study focused exclusively on CRC datasets. As a result, findings and workflows may not be immediately generalizable to other cancer types without retraining or additional customization of the AI system. Although the architecture is designed to be adaptable, validation on other tumor types and disease areas will be essential for broader adoption. Fourth, while benchmarking analyses demonstrated strong performance compared to tools like cBioPortal and UCSC Xena, formal usability testing and prospective validation in real-world clinical and research environments were not conducted. These are planned as part of future development phases and will be critical for refining the user interface, evaluating human-AI collaboration, and assessing clinical impact. By acknowledging and addressing these limitations, future iterations of AI-HOPE-PM can be improved to better support equitable, scalable, and clinically relevant precision medicine research.</p><p>A notable limitation of the current study is the use of simulated SDOH variables rather than real-world data. While these simulated features were generated to reflect established patterns from peer-reviewed literature and public health datasets, they cannot fully replicate the variability, context-dependence, or missingness typical of empirical SDOH data collected in clinical or community settings. This limitation may impact the external validity of some findings and restrict generalizability. To address this, we are actively pursuing collaborations with health systems and community-based data partners to incorporate validated, longitudinal SDOH datasets into future deployments of AI-HOPE-PM. This planned integration will enable more accurate assessment of equity-relevant outcomes and enhance the platform&#x2019;s application in real-world clinical research.</p><p>While AI-HOPE-PM achieved a high query interpretation accuracy of 92.5% during internal evaluation, several error modes were identified that merit consideration. The most frequent issues involved ambiguity in natural language input&#x2014;particularly when users provided imprecise criteria for cohort selection or omitted critical parameters. Additionally, complex nested queries and nonstandard phrasing occasionally led to misinterpretation or partial execution. In a minority of cases, errors stemmed from misalignment between user terminology and the platform&#x2019;s internal ontology, particularly for less common clinical or SDOH variables. To address these challenges, AI-HOPE-PM integrates clarification prompts that guide users toward more precise query formulation and supports synonym recognition for common variable names. Ongoing improvements include refining the language model&#x2019;s domain specificity and expanding the internal ontology to better accommodate diverse user inputs. These enhancements are essential for improving reproducibility and user experience in real-world settings.</p><p>A key limitation of this study is the use of simulated SDOH variables rather than real-world data. While simulation allowed us to prototype and evaluate the functionality of AI-HOPE-PM under controlled conditions, it does not fully capture the complexity, heterogeneity, or potential missingness often present in real clinical and social datasets. To address this limitation, we have developed and released an open-source Python script [<xref ref-type="bibr" rid="ref25">25</xref>] that transparently outlines our simulation methodology. Additionally, we are actively working on the integration of real-world SDOH data through ongoing projects [<xref ref-type="bibr" rid="ref32">32</xref>], which is sequencing and characterizing tumors from 500 Hispanic/Latino patients in the Los Angeles catchment area. These datasets will allow us to test AI-HOPE-PM&#x2019;s performance in real clinical environments and refine its capacity to analyze authentic, population-specific SDOH variables in future iterations.</p><p>To address this limitation, we acknowledge that the current evaluation of AI-HOPE-PM using 100 natural language queries&#x2014;while carefully curated by physician-scientists, public health researchers, biostatisticians, and bioinformaticians to reflect real-world clinical and translational scenarios&#x2014;represents an early validation phase. These queries were intentionally designed to ensure clinical accuracy, relevance, and internal consistency. However, we recognize the importance of expanding evaluation to include a broader and more diverse group of end users across different levels of expertise. Future iterations of AI-HOPE-PM will incorporate structured feedback from clinicians, public health researchers, and community health stakeholders. This participatory approach will help identify diverse interaction patterns, reduce potential biases, and enhance the platform&#x2019;s interpretive capacity over time.</p></sec><sec id="s4-4"><title>Future Directions</title><p>Future development of AI-HOPE-PM will focus on several enhancements. First, expanding support for additional omics layers [<xref ref-type="bibr" rid="ref32">32</xref>], including spatial biology [<xref ref-type="bibr" rid="ref33">33</xref>] and single-cell [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>], could improve the platform&#x2019;s applicability to emerging areas in systems oncology. Integration with federated learning frameworks may also enable secure, institution-specific model updates without compromising patient privacy. Moreover, enhancing the system&#x2019;s ability to handle longitudinal data, including treatment timelines and SDOH trajectories, will be critical for supporting causal inference and policy-relevant research [<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref42">42</xref>].</p><p>Future iterations of AI-HOPE-PM will prioritize the integration of more inclusive and representative genomic datasets to enhance the platform&#x2019;s utility across diverse patient populations. While the current analyses rely on publicly available sources such as TCGA and cBioPortal&#x2014;which are known to underrepresent racial and ethnic minorities&#x2014;there have been encouraging advances in improving dataset diversity, particularly in CRC cohorts submitted by major US cancer centers. Notably, several ongoing initiatives aim to sequence and characterize tumors from historically underrepresented populations, including Hispanic/Latino patients with CRC [<xref ref-type="bibr" rid="ref32">32</xref>]. These datasets, once publicly released, will be incorporated into AI-HOPE-PM to improve its generalizability and relevance in addressing cancer health disparities. This aligns with our overarching mission to develop equity-focused precision oncology tools that are responsive to the needs of all communities.</p><p>In this study, benchmarking primarily focused on task completion time&#x2014;measuring the duration to execute standard bioinformatics queries across AI-HOPE-PM, cBioPortal, and UCSC Xena. While AI-HOPE-PM demonstrated superior efficiency due to its natural language automation, we acknowledge that this assessment does not encompass analytical output comparison. Future benchmarking studies will evaluate not only speed but also reproducibility and concordance of statistical results, including survival curves, odds ratios, and subgroup analyses. This expanded evaluation will ensure that AI-HOPE-PM delivers results comparable in accuracy and robustness to established platforms, further supporting its utility for translational cancer research.</p><p>A preliminary usability assessment was conducted during an internal pilot deployment involving five clinician-scientists and three public health researchers. Participants were asked to complete common clinical-genomic queries using AI-HOPE-PM and provide structured feedback on system usability, interpretability of outputs, and ease of query formulation. Feedback indicated that users found the natural language interface intuitive and appreciated the automation of statistical analyses without coding. Suggestions for improvement included refining terminology prompts and expanding visualization customization. These insights have been incorporated into the current version of AI-HOPE-PM, and a formal usability study with a larger and more diverse cohort is currently underway to systematically evaluate accessibility, performance, and user satisfaction.</p><p>To enhance accessibility and promote broader adoption, particularly in resource-constrained environments, we are actively exploring deployment strategies that reduce local infrastructure requirements. Although the current AI-HOPE-PM system benefits from graphics processing unit acceleration for large-scale genomic analyses, the core functionalities&#x2014;including query interpretation, basic statistical modeling, and report generation&#x2014;can be executed on standard central processing unit-based systems. Additionally, we are developing a lightweight web-hosted version of the platform with backend support on scalable cloud infrastructure, enabling institutions with limited computational resources to access AI-HOPE-PM through a browser without the need for specialized hardware. Future iterations will also offer modular processing options that allow users to select compute-intensive features based on available resources.</p><p>User-centered evaluations&#x2014;including usability studies with diverse researchers and clinicians&#x2014;are planned to better understand the platform&#x2019;s accessibility and impact in real-world settings. Additionally, collaborations with community-based research initiatives may help validate AI-HOPE-PM&#x2019;s role in addressing health disparities and improving equity in precision medicine.</p><p>AI-HOPE-PM was developed with scalability and accessibility in mind, including potential deployment in resource-constrained settings. The system can be installed and executed locally, eliminating the need for high-bandwidth internet or continuous cloud access. While graphics processing unit acceleration can enhance performance for large-scale queries, the platform&#x2019;s modular backend supports central processing units&#x2013;only configurations for smaller datasets and standard analyses. Ongoing optimization efforts aim to further reduce computational overhead through lightweight LLM variants and model compression techniques. These features support broader adoption across diverse institutional environments, including low-resource clinical and research settings.</p><p>A key consideration for the broader adoption of AI-HOPE-PM is the potential for language bias and variability in natural language queries. While the current version of the platform is optimized for English-language input, this may limit accessibility for nonnative English speakers or introduce semantic variability that could affect interpretation. To mitigate this, AI-HOPE-PM employs a domain-specific ontology with synonym recognition and structured clarification prompts that guide users toward standardized, interpretable input. These features reduce the likelihood of misinterpretation and increase the reliability of query processing. Nonetheless, we recognize the importance of supporting diverse linguistic backgrounds in biomedical research. Future iterations of the platform will integrate multilingual capabilities and undergo structured usability evaluations in non&#x2013;English-speaking populations to ensure equitable utility and minimize language-related inequities in research engagement.</p></sec></sec></body><back><notes><sec><title>Data Availability</title><p>The AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics) platform, along with demonstration datasets, can be accessed on GitHub [<xref ref-type="bibr" rid="ref25">25</xref>].</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AI-HOPE-PM</term><def><p> Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics</p></def></def-item><def-item><term id="abb3">CRC</term><def><p>colorectal cancer</p></def></def-item><def-item><term id="abb4">FOLFOX</term><def><p>folinic acid, fluorouracil, and oxaliplatin</p></def></def-item><def-item><term id="abb5">GUI</term><def><p>graphical user interface</p></def></def-item><def-item><term id="abb6">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb7">SDOH</term><def><p>social determinants of health</p></def></def-item><def-item><term id="abb8">TCGA</term><def><p>The Cancer Genome Atlas</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Collins</surname><given-names>FS</given-names> </name><name name-style="western"><surname>Varmus</surname><given-names>H</given-names> </name></person-group><article-title>A new initiative on precision medicine</article-title><source>N Engl J Med</source><year>2015</year><month>02</month><day>26</day><volume>372</volume><issue>9</issue><fpage>793</fpage><lpage>795</lpage><pub-id pub-id-type="doi">10.1056/NEJMp1500523</pub-id><pub-id pub-id-type="medline">25635347</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Bono</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Ashworth</surname><given-names>A</given-names> </name></person-group><article-title>Translating cancer research into targeted therapeutics</article-title><source>Nature New Biol</source><year>2010</year><month>09</month><day>30</day><volume>467</volume><issue>7315</issue><fpage>543</fpage><lpage>549</lpage><pub-id pub-id-type="doi">10.1038/nature09339</pub-id><pub-id pub-id-type="medline">20882008</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Braveman</surname><given-names>P</given-names> </name><name name-style="western"><surname>Gottlieb</surname><given-names>L</given-names> </name></person-group><article-title>The social determinants of health: it&#x2019;s time to consider the causes of the causes</article-title><source>Public Health Rep</source><year>2014</year><volume>129 Suppl 2</volume><issue>Suppl 2</issue><fpage>19</fpage><lpage>31</lpage><pub-id pub-id-type="doi">10.1177/00333549141291S206</pub-id><pub-id pub-id-type="medline">24385661</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pinheiro</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Reshetnyak</surname><given-names>E</given-names> </name><name name-style="western"><surname>Akinyemiju</surname><given-names>T</given-names> </name><name name-style="western"><surname>Phillips</surname><given-names>E</given-names> </name><name name-style="western"><surname>Safford</surname><given-names>MM</given-names> </name></person-group><article-title>Social determinants of health and cancer mortality in the Reasons for Geographic and Racial Differences in Stroke (REGARDS) cohort study</article-title><source>Cancer</source><year>2022</year><month>01</month><day>1</day><volume>128</volume><issue>1</issue><fpage>122</fpage><lpage>130</lpage><pub-id pub-id-type="doi">10.1002/cncr.33894</pub-id><pub-id pub-id-type="medline">34478162</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adler</surname><given-names>NE</given-names> </name><name name-style="western"><surname>Stewart</surname><given-names>J</given-names> </name></person-group><article-title>Preface to the biology of disadvantage: socioeconomic status and health</article-title><source>Ann N Y Acad Sci</source><year>2010</year><month>02</month><volume>1186</volume><issue>2</issue><fpage>1</fpage><lpage>4</lpage><pub-id pub-id-type="doi">10.1111/j.1749-6632.2009.05385.x</pub-id><pub-id pub-id-type="medline">20201864</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Popejoy</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Fullerton</surname><given-names>SM</given-names> </name></person-group><article-title>Genomics is failing on diversity</article-title><source>Nature New Biol</source><year>2016</year><month>10</month><day>13</day><volume>538</volume><issue>7624</issue><fpage>161</fpage><lpage>164</lpage><pub-id pub-id-type="doi">10.1038/538161a</pub-id><pub-id pub-id-type="medline">27734877</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Manrai</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Funke</surname><given-names>BH</given-names> </name><name name-style="western"><surname>Rehm</surname><given-names>HL</given-names> </name><etal/></person-group><article-title>Genetic misdiagnoses and the potential for health disparities</article-title><source>N Engl J Med</source><year>2016</year><month>08</month><day>18</day><volume>375</volume><issue>7</issue><fpage>655</fpage><lpage>665</lpage><pub-id pub-id-type="doi">10.1056/NEJMsa1507092</pub-id><pub-id pub-id-type="medline">27532831</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hamid</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>P</given-names> </name><name name-style="western"><surname>Roslin</surname><given-names>NM</given-names> </name><name name-style="western"><surname>Ling</surname><given-names>V</given-names> </name><name name-style="western"><surname>Greenwood</surname><given-names>CMT</given-names> </name><name name-style="western"><surname>Beyene</surname><given-names>J</given-names> </name></person-group><article-title>Data integration in genetics and genomics: methods and challenges</article-title><source>Hum Genomics Proteomics</source><year>2009</year><month>01</month><day>12</day><volume>2009</volume><fpage>869093</fpage><pub-id pub-id-type="doi">10.4061/2009/869093</pub-id><pub-id pub-id-type="medline">20948564</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reif</surname><given-names>DM</given-names> </name><name name-style="western"><surname>White</surname><given-names>BC</given-names> </name><name name-style="western"><surname>Moore</surname><given-names>JH</given-names> </name></person-group><article-title>Integrated analysis of genetic, genomic and proteomic data</article-title><source>Expert Rev Proteomics</source><year>2004</year><month>06</month><volume>1</volume><issue>1</issue><fpage>67</fpage><lpage>75</lpage><pub-id pub-id-type="doi">10.1586/14789450.1.1.67</pub-id><pub-id pub-id-type="medline">15966800</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cerami</surname><given-names>E</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Dogrusoz</surname><given-names>U</given-names> </name><etal/></person-group><article-title>The cBio cancer genomics portal: an open platform for exploring multidimensional cancer genomics data</article-title><source>Cancer Discov</source><year>2012</year><month>05</month><volume>2</volume><issue>5</issue><fpage>401</fpage><lpage>404</lpage><pub-id pub-id-type="doi">10.1158/2159-8290.CD-12-0095</pub-id><pub-id pub-id-type="medline">22588877</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chandrashekar</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Bashel</surname><given-names>B</given-names> </name><name name-style="western"><surname>Balasubramanya</surname><given-names>SAH</given-names> </name><etal/></person-group><article-title>UALCAN: a portal for facilitating tumor subgroup gene expression and survival analyses</article-title><source>Neoplasia</source><year>2017</year><month>08</month><volume>19</volume><issue>8</issue><fpage>649</fpage><lpage>658</lpage><pub-id pub-id-type="doi">10.1016/j.neo.2017.05.002</pub-id><pub-id pub-id-type="medline">28732212</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tomczak</surname><given-names>K</given-names> </name><name name-style="western"><surname>Czerwi&#x0144;ska</surname><given-names>P</given-names> </name><name name-style="western"><surname>Wiznerowicz</surname><given-names>M</given-names> </name></person-group><article-title>The Cancer Genome Atlas (TCGA): an immeasurable source of knowledge</article-title><source>Contemp Oncol (Pozn)</source><year>2015</year><volume>19</volume><issue>1A</issue><fpage>A68</fpage><lpage>77</lpage><pub-id pub-id-type="doi">10.5114/wo.2014.47136</pub-id><pub-id pub-id-type="medline">25691825</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Monge</surname><given-names>C</given-names> </name><name name-style="western"><surname>Waldrup</surname><given-names>B</given-names> </name><name name-style="western"><surname>Carranza</surname><given-names>FG</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>Molecular heterogeneity in early-onset colorectal cancer: pathway-specific insights in high-risk populations</article-title><source>Cancers (Basel)</source><year>2025</year><month>04</month><day>15</day><volume>17</volume><issue>8</issue><fpage>1325</fpage><pub-id pub-id-type="doi">10.3390/cancers17081325</pub-id><pub-id pub-id-type="medline">40282501</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Monge</surname><given-names>C</given-names> </name><name name-style="western"><surname>Waldrup</surname><given-names>B</given-names> </name><name name-style="western"><surname>Manjarrez</surname><given-names>S</given-names> </name><name name-style="western"><surname>Carranza</surname><given-names>FG</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>Detecting PI3K and TP53 pathway disruptions in early-onset colorectal cancer among Hispanic/Latino patients</article-title><source>Cancer Med</source><year>2025</year><month>04</month><volume>14</volume><issue>7</issue><fpage>e70791</fpage><pub-id pub-id-type="doi">10.1002/cam4.70791</pub-id><pub-id pub-id-type="medline">40165548</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Monge</surname><given-names>C</given-names> </name><name name-style="western"><surname>Waldrup</surname><given-names>B</given-names> </name><name name-style="western"><surname>Carranza</surname><given-names>FG</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>Ethnicity-specific molecular alterations in MAPK and JAK/STAT pathways in early-onset colorectal cancer</article-title><source>Cancers (Basel)</source><year>2025</year><month>03</month><day>25</day><volume>17</volume><issue>7</issue><fpage>1093</fpage><pub-id pub-id-type="doi">10.3390/cancers17071093</pub-id><pub-id pub-id-type="medline">40227607</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zheng</surname><given-names>L</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kong</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zhan</surname><given-names>Y</given-names> </name></person-group><article-title>A gene signature related to programmed cell death to predict immunotherapy response and prognosis in colon adenocarcinoma</article-title><source>PeerJ</source><year>2025</year><volume>13</volume><fpage>e18895</fpage><pub-id pub-id-type="doi">10.7717/peerj.18895</pub-id><pub-id pub-id-type="medline">39950044</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Xiao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zheng</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>CellAgent: an LLM-driven multi-agent framework for automated single-cell data analysis</article-title><source>bioRxiv</source><comment>Preprint posted online on  May 5, 2020</comment><pub-id pub-id-type="doi">10.1101/2024.05.13.593861</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhou</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>B</given-names> </name><name name-style="western"><surname>Li</surname><given-names>G</given-names> </name><etal/></person-group><article-title>An AI agent for fully automated multi&#x2010;omic analyses</article-title><source>Adv Sci (Weinh)</source><year>2024</year><month>11</month><volume>11</volume><issue>44</issue><pub-id pub-id-type="doi">10.1002/advs.202407094</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mandelbaum</surname><given-names>M</given-names> </name><name name-style="western"><surname>Levy-Erez</surname><given-names>D</given-names> </name><name name-style="western"><surname>Soffer</surname><given-names>S</given-names> </name><name name-style="western"><surname>Klang</surname><given-names>E</given-names> </name><name name-style="western"><surname>Levy-Mendelovich</surname><given-names>S</given-names> </name></person-group><article-title>Artificial intelligence: large language models in pediatrics. What do we know so far?</article-title><source>Isr Med Assoc J</source><year>2025</year><month>03</month><volume>27</volume><issue>3</issue><fpage>183</fpage><lpage>188</lpage><pub-id pub-id-type="medline">40134173</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Carl&#x00E0;</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Gambini</surname><given-names>G</given-names> </name><name name-style="western"><surname>Giannuzzi</surname><given-names>F</given-names> </name><name name-style="western"><surname>Boselli</surname><given-names>F</given-names> </name><name name-style="western"><surname>De Luca</surname><given-names>L</given-names> </name><name name-style="western"><surname>Rizzo</surname><given-names>S</given-names> </name></person-group><article-title>Testing the reliability of ChatGPT assistance for surgical choices in challenging glaucoma cases</article-title><source>J Pers Med</source><year>2025</year><month>02</month><day>28</day><volume>15</volume><issue>3</issue><fpage>97</fpage><pub-id pub-id-type="doi">10.3390/jpm15030097</pub-id><pub-id pub-id-type="medline">40137413</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kopka</surname><given-names>M</given-names> </name><name name-style="western"><surname>von Kalckreuth</surname><given-names>N</given-names> </name><name name-style="western"><surname>Feufel</surname><given-names>MA</given-names> </name></person-group><article-title>Accuracy of online symptom assessment applications, large language models, and laypeople for self-triage decisions</article-title><source>NPJ Digit Med</source><year>2025</year><month>03</month><day>25</day><volume>8</volume><issue>1</issue><fpage>178</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01566-6</pub-id><pub-id pub-id-type="medline">40133390</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ishida</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sato</surname><given-names>T</given-names> </name><name name-style="western"><surname>Honma</surname><given-names>T</given-names> </name><name name-style="western"><surname>Terayama</surname><given-names>K</given-names> </name></person-group><article-title>Large language models open new way of AI-assisted molecule design for chemists</article-title><source>J Cheminform</source><year>2025</year><month>03</month><day>24</day><volume>17</volume><issue>1</issue><fpage>36</fpage><pub-id pub-id-type="doi">10.1186/s13321-025-00984-8</pub-id><pub-id pub-id-type="medline">40128788</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>AI-HOPE: an AI-driven conversational agent for enhanced clinical and genomic data integration in precision medicine research</article-title><source>Bioinformatics</source><year>2025</year><month>07</month><day>1</day><volume>41</volume><issue>7</issue><fpage>btaf359</fpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btaf359</pub-id><pub-id pub-id-type="medline">40577785</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goldman</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Craft</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hastie</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Visualizing and interpreting cancer genomics data via the Xena platform</article-title><source>Nat Biotechnol</source><year>2020</year><month>06</month><volume>38</volume><issue>6</issue><fpage>675</fpage><lpage>678</lpage><pub-id pub-id-type="doi">10.1038/s41587-020-0546-8</pub-id><pub-id pub-id-type="medline">32444850</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>Velazquez Villarreal Lab</collab></person-group><article-title>AI-HOPE-PM</article-title><source>GitHub</source><access-date>2024-06-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/Velazquez-Villarreal-Lab/AI-HOPE-PM">https://github.com/Velazquez-Villarreal-Lab/AI-HOPE-PM</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hutchings</surname><given-names>H</given-names> </name><name name-style="western"><surname>Behinaein</surname><given-names>P</given-names> </name><name name-style="western"><surname>Enofe</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Association of social determinants with patient-reported outcomes in patients with cancer</article-title><source>Cancers (Basel)</source><year>2024</year><month>02</month><day>29</day><volume>16</volume><issue>5</issue><fpage>1015</fpage><pub-id pub-id-type="doi">10.3390/cancers16051015</pub-id><pub-id pub-id-type="medline">38473374</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><article-title>YouTube video: Artificial intelligence agent &#x2013; the Velazquez-Villarreal Lab at City of HOPE - AI-HOPE-PM example 4</article-title><source>YouTube Doctor Enrique Velazquez YouTube page</source><year>2025</year><month>06</month><day>20</day><access-date>2025-06-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://youtu.be/4ZuD7-L2RuM">https://youtu.be/4ZuD7-L2RuM</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="web"><article-title>YouTube video: Artificial intelligence agent &#x2013; the Velazquez-Villarreal Lab at City of HOPE - AI-HOPE-PM example 1</article-title><source>Doctor Enrique Velazquez YouTube page</source><year>2025</year><month>06</month><day>20</day><access-date>2025-06-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://youtu.be/senJauIc47M">https://youtu.be/senJauIc47M</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><article-title>YouTube video: Artificial intelligence agent &#x2013; the Velazquez-Villarreal Lab at City of Hope - AI-HOPE-PM example 3</article-title><source>Doctor Enrique Velazquez YouTube page</source><year>2025</year><month>06</month><day>20</day><access-date>2025-06-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://youtu.be/wPk6Vh6n5xg">https://youtu.be/wPk6Vh6n5xg</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><article-title>YouTube video: Artificial intelligence agent &#x2013; the Velazquez-Villarreal Lab at City of HOPE - AI-HOPE-PM example 2</article-title><source>Doctor Enrique Velazquez YouTube page</source><year>2025</year><month>06</month><day>20</day><access-date>2025-06-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://youtu.be/b6us-TKWgDc">https://youtu.be/b6us-TKWgDc</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>YouTube video: Artificial intelligence agent &#x2013; the Velazquez-Villarreal Lab at City of HOPE - AI-HOPE-PM example 5</article-title><source>Doctor Enrique Velazquez YouTube page</source><year>2025</year><month>06</month><day>20</day><access-date>2025-06-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://youtu.be/XNi4bkWE_AI">https://youtu.be/XNi4bkWE_AI</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Waldrup</surname><given-names>B</given-names> </name><name name-style="western"><surname>Carranza</surname><given-names>F</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Integrative multi-omics profiling of colorectal cancer from a Hispanic/Latino cohort of patients</article-title><source>medRxiv</source><comment>Preprint posted online on  Nov 15, 2024</comment><pub-id pub-id-type="doi">10.1101/2024.11.03.24316599</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Carranza</surname><given-names>FG</given-names> </name><name name-style="western"><surname>Diaz</surname><given-names>FC</given-names> </name><name name-style="western"><surname>Ninova</surname><given-names>M</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>Current state and future prospects of spatial biology in colorectal cancer</article-title><source>Front Oncol</source><year>2024</year><volume>14</volume><fpage>1513821</fpage><pub-id pub-id-type="doi">10.3389/fonc.2024.1513821</pub-id><pub-id pub-id-type="medline">39711954</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>EI</given-names> </name><name name-style="western"><surname>Maheshwari</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sorenson</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Single-cell sequencing of genomic DNA resolves sub-clonal heterogeneity in a melanoma cell line</article-title><source>Commun Biol</source><year>2020</year><month>06</month><day>25</day><volume>3</volume><issue>1</issue><fpage>318</fpage><pub-id pub-id-type="doi">10.1038/s42003-020-1044-8</pub-id><pub-id pub-id-type="medline">32587328</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Amezquita</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Lun</surname><given-names>ATL</given-names> </name><name name-style="western"><surname>Becht</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Orchestrating single-cell analysis with bioconductor</article-title><source>Nat Methods</source><year>2020</year><volume>17</volume><issue>2</issue><fpage>137</fpage><lpage>145</lpage><pub-id pub-id-type="doi">10.1038/s41592-019-0700-8</pub-id><pub-id pub-id-type="medline">31792435</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heumos</surname><given-names>L</given-names> </name><name name-style="western"><surname>Schaar</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Lance</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Best practices for single-cell analysis across modalities</article-title><source>Nat Rev Genet</source><year>2023</year><month>08</month><volume>24</volume><issue>8</issue><fpage>550</fpage><lpage>572</lpage><pub-id pub-id-type="doi">10.1038/s41576-023-00586-w</pub-id><pub-id pub-id-type="medline">37002403</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Waldrup</surname><given-names>B</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>Precision oncology through dialogue: AI-HOPE-RTK-RAS integrates clinical and genomic insights into RTK-RAS alterations in colorectal cancer</article-title><source>Biomedicines</source><year>2025</year><month>07</month><day>28</day><volume>13</volume><issue>8</issue><fpage>1835</fpage><pub-id pub-id-type="doi">10.3390/biomedicines13081835</pub-id><pub-id pub-id-type="medline">40868090</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Waldrup</surname><given-names>B</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>Decoding the JAK-STAT axis in colorectal cancer with AI-HOPE-JAK-STAT: a conversational artificial intelligence approach to clinical-genomic integration</article-title><source>Cancers (Basel)</source><year>2025</year><month>07</month><day>17</day><volume>17</volume><issue>14</issue><fpage>2376</fpage><pub-id pub-id-type="doi">10.3390/cancers17142376</pub-id><pub-id pub-id-type="medline">40723258</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Waldrup</surname><given-names>B</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>AI-HOPE-TP53: a conversational artificial intelligence agent for pathway-centric analysis of TP53-driven molecular alterations in early-onset colorectal cancer</article-title><source>Cancers (Basel)</source><year>2025</year><month>08</month><day>31</day><volume>17</volume><issue>17</issue><fpage>2865</fpage><pub-id pub-id-type="doi">10.3390/cancers17172865</pub-id><pub-id pub-id-type="medline">40940961</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Waldrup</surname><given-names>B</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>AI-HOPE-TGFbeta: a conversational AI agent for integrative clinical and genomic analysis of TGF-&#x03B2; pathway alterations in colorectal cancer to advance precision medicine</article-title><source>AI</source><year>2025</year><volume>6</volume><issue>7</issue><fpage>137</fpage><pub-id pub-id-type="doi">10.3390/ai6070137</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Waldrup</surname><given-names>B</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>From mutation to prognosis: AI-HOPE-PI3K enables artificial intelligence agent-driven integration of PI3K pathway data in colorectal cancer precision medicine</article-title><source>Int J Mol Sci</source><year>2025</year><month>07</month><day>5</day><volume>26</volume><issue>13</issue><fpage>6487</fpage><pub-id pub-id-type="doi">10.3390/ijms26136487</pub-id><pub-id pub-id-type="medline">40650262</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Waldrup</surname><given-names>B</given-names> </name><name name-style="western"><surname>Velazquez-Villarreal</surname><given-names>E</given-names> </name></person-group><article-title>Conversational AI agent for precision oncology: AI-HOPE-WNT integrates clinical and genomic data to investigate WNT pathway dysregulation in colorectal cancer</article-title><source>Front Artif Intell</source><year>2025</year><volume>8</volume><fpage>1624797</fpage><pub-id pub-id-type="doi">10.3389/frai.2025.1624797</pub-id><pub-id pub-id-type="medline">40860720</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics) analysis of patients with early-onset colorectal cancer treated with folinic acid, fluorouracil, and oxaliplatin and varying levels of social support.</p><media xlink:href="bioinform_v6i1e76553_app1.docx" xlink:title="DOCX File, 501 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics) analysis of patients with colorectal cancer with and without chemotherapy treatment, food security, and APC mutations.</p><media xlink:href="bioinform_v6i1e76553_app2.docx" xlink:title="DOCX File, 360 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics) analysis of patients with colorectal cancer with KRAS mutations in the context of insurance coverage and tumor stage.</p><media xlink:href="bioinform_v6i1e76553_app3.docx" xlink:title="DOCX File, 328 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics) analysis of survival outcomes in patients with colorectal cancer with different insurance and treatment profiles.</p><media xlink:href="bioinform_v6i1e76553_app4.docx" xlink:title="DOCX File, 352 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>AI-HOPE-PM (Artificial Intelligence Agent for High-Optimization and Precision Medicine in Population Metrics) stratification of patients with colorectal cancer by health care access, <italic>APC</italic> mutation, and ethnicity for survival and treatment disparity analysis.</p><media xlink:href="bioinform_v6i1e76553_app5.docx" xlink:title="DOCX File, 385 KB"/></supplementary-material></app-group></back></article>