<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JBB</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Bioinform Biotech</journal-id>
      <journal-title>JMIR Bioinformatics and Biotechnology</journal-title>
      <issn pub-type="epub">2563-3570</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v3i1e37701</article-id>
      <article-id pub-id-type="pmid"/>
      <article-id pub-id-type="doi">10.2196/37701</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Diagnosis of a Single-Nucleotide Variant in Whole-Exome Sequencing Data for Patients With Inherited Diseases: Machine Learning Study Using Artificial Intelligence Variant Prioritization</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Cheng</surname>
            <given-names>Yen-Fu</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Pontikos</surname>
            <given-names>Nikolas</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Liuu</surname>
            <given-names>Cong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>Yu-Shan</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2876-9079</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Hsu</surname>
            <given-names>Ching</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0560-1019</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Chune</surname>
            <given-names>Yu-Chang</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7290-9446</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Liao</surname>
            <given-names>I-Cheng</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8261-7554</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Hsin</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1929-6449</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Lin</surname>
            <given-names>Yi-Lin</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3869-0479</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Hwu</surname>
            <given-names>Wuh-Liang</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6690-4879</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Ni-Chung</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5011-7499</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Lai</surname>
            <given-names>Feipei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Graduate Institute of Biomedical Electronics and Bioinformatics</institution>
            <institution>National Taiwan University</institution>
            <addr-line>Number 1, Roosevelt Road, Section 4</addr-line>
            <addr-line>Taipei City, 106319</addr-line>
            <country>Taiwan</country>
            <fax>886 2 23628167</fax>
            <phone>886 2 33664924</phone>
            <email>flai@ntu.edu.tw</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7147-8122</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science and Information Engineering</institution>
        <institution>National Taiwan University</institution>
        <addr-line>Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Graduate Institute of Biomedical Electronics and Bioinformatics</institution>
        <institution>National Taiwan University</institution>
        <addr-line>Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Medical Genetics</institution>
        <institution>National Taiwan University Hospital</institution>
        <addr-line>Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Pediatrics</institution>
        <institution>National Taiwan University Hospital</institution>
        <addr-line>Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Feipei Lai <email>flai@ntu.edu.tw</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Jan-Dec</season>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>9</month>
        <year>2022</year>
      </pub-date>
      <volume>3</volume>
      <issue>1</issue>
      <elocation-id>e37701</elocation-id>
      <history>
        <date date-type="received">
          <day>4</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>9</day>
          <month>5</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>29</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>22</day>
          <month>8</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Yu-Shan Huang, Ching Hsu, Yu-Chang Chune, I-Cheng Liao, Hsin Wang, Yi-Lin Lin, Wuh-Liang Hwu, Ni-Chung Lee, Feipei Lai. Originally published in JMIR Bioinformatics and Biotechnology (https://bioinform.jmir.org), 15.09.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Bioinformatics and Biotechnology, is properly cited. The complete bibliographic information, a link to the original publication on https://bioinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://bioinform.jmir.org/2022/1/e37701" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>In recent years, thanks to the rapid development of next-generation sequencing (NGS) technology, an entire human genome can be sequenced in a short period. As a result, NGS technology is now being widely introduced into clinical diagnosis practice, especially for diagnosis of hereditary disorders. Although the exome data of single-nucleotide variant (SNV) can be generated using these approaches, processing the DNA sequence data of a patient requires multiple tools and complex bioinformatics pipelines.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to assist physicians to automatically interpret the genetic variation information generated by NGS in a short period. To determine the true causal variants of a patient with genetic disease, currently, physicians often need to view numerous features on every variant manually and search for literature in different databases to understand the effect of genetic variation.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We constructed a machine learning model for predicting disease-causing variants in exome data. We collected sequencing data from whole-exome sequencing (WES) and gene panel as training set, and then integrated variant annotations from multiple genetic databases for model training. The model built ranked SNVs and output the most possible disease-causing candidates. For model testing, we collected WES data from 108 patients with rare genetic disorders in National Taiwan University Hospital. We applied sequencing data and phenotypic information automatically extracted by a keyword extraction tool from patient’s electronic medical records into our machine learning model.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We succeeded in locating 92.5% (124/134) of the causative variant in the top 10 ranking list among an average of 741 candidate variants per person after filtering. AI Variant Prioritizer was able to assign the target gene to the top rank for around 61.1% (66/108) of the patients, followed by Variant Prioritizer, which assigned it for 44.4% (48/108) of the patients. The cumulative rank result revealed that our AI Variant Prioritizer has the highest accuracy at ranks 1, 5, 10, and 20. It also shows that AI Variant Prioritizer presents better performance than other tools. After adopting the Human Phenotype Ontology (HPO) terms by looking up the databases, the top 10 ranking list can be increased to 93.5% (101/108).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We successfully applied sequencing data from WES and free-text phenotypic information of patient’s disease automatically extracted by the keyword extraction tool for model training and testing. By interpreting our model, we identified which features of variants are important. Besides, we achieved a satisfactory result on finding the target variant in our testing data set. After adopting the HPO terms by looking up the databases, the top 10 ranking list can be increased to 93.5% (101/108). The performance of the model is similar to that of manual analysis, and it has been used to help National Taiwan University Hospital with a genetic diagnosis.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>next-generation sequencing</kwd>
        <kwd>genetic variation analysis</kwd>
        <kwd>machine learning</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>whole-exome sequencing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Modern next-genome sequencing (NGS) technology makes rapid human genome sequencing within a day possible [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Because of its speed and low cost in comparison with the traditional Sanger sequencing method [<xref ref-type="bibr" rid="ref3">3</xref>], NGS is being rapidly introduced into clinical and public health laboratory practice, especially for the diagnosis of hereditary disorders.</p>
        <p>Although NGS has extremely high throughput and could generate huge amounts of genomic data in a short time, interpreting these data and finding the disease-causing candidates among thousands of variants remain a challenge. To determine the true causal variants of a patient with genetic disease, physicians often need to view numerous features on every variant manually and search for literature in different databases to understand the effect of a genetic variation. Another challenge is in finding the genetic variants that have a strong correlation with patient’s phenotype. Physicians often select useful keywords from patient’s electronic medical records (EMRs) manually to search for articles in several genetic databases such as Online Mendelian Inheritance in Man (OMIM) [<xref ref-type="bibr" rid="ref4">4</xref>] and GeneReviews [<xref ref-type="bibr" rid="ref5">5</xref>] to decide whether a variant is correlated with a genetic disease. It is thus a burden for physicians to go through these laborious and time-consuming processes case-by-case, especially when the number of inherited disease–associated germline mutations published per year has increased exponentially in the last decade [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        <p>Nowadays, many studies use machine learning methods to solve numerous problems in genomics and genetics. The field of machine learning promises to enable computers to assist humans in making sense of large, complex data sets. After variant annotation, there is a variant list with hundreds of columns that humans are not capable of interpreting one-by-one. As machine learning significantly surpasses human-level performance, especially with structured data, we consider using a machine learning method to analyze variants from NGS and find the target gene.</p>
        <p>To address these problems, it is important and necessary to have a high-performance method to filter candidate variants from NGS results and immediately find target variants related to a patient’s disease. Recently, many tools such as Exomiser [<xref ref-type="bibr" rid="ref7">7</xref>], DeepPVP [<xref ref-type="bibr" rid="ref8">8</xref>], Xrare [<xref ref-type="bibr" rid="ref9">9</xref>], VarSight [<xref ref-type="bibr" rid="ref10">10</xref>], Phenolyzer [<xref ref-type="bibr" rid="ref11">11</xref>], Fabric GEM [<xref ref-type="bibr" rid="ref12">12</xref>], MOON [<xref ref-type="bibr" rid="ref2">2</xref>], CADD [<xref ref-type="bibr" rid="ref13">13</xref>], and MetaSVM [<xref ref-type="bibr" rid="ref14">14</xref>] have been developed to identify potentially causative variants that are relevant to patient’s phenotype in rare disease diagnosis. Exomiser integrates information including calculated gene-specific phenotype score, variant allele frequency (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), and predicted pathogenicity of several alleles to prioritize disease-causative variants/interactions. Fabric GEM utilizes Bayes factor to prioritize variants with the support of a gene-phenotype score calculated by Phevor [<xref ref-type="bibr" rid="ref15">15</xref>] and variant prioritization result of several tools including ANNOVAR, VAAST, and Phen-Gen. MOON integrates the result of annotation of several variants and prioritization tools to achieve variant prioritization using several kinds of machine learning models. Gene-phenotype scores calculated by Phevor using Human Phenotype Ontology (HPO) terms extracted from electronic health records (EHRs) of patients are also considered by MOON. CADD utilizes logistic regression to integrate information including context of surrounding sequence, biological constraints, epigenetic measurements, and result of several variant annotation tools to build a predictive model for variant deleteriousness. MetaSVM [<xref ref-type="bibr" rid="ref14">14</xref>] gathers result of 9 deleteriousness prediction scores including PolyPhen-2 [<xref ref-type="bibr" rid="ref16">16</xref>], SIFT [<xref ref-type="bibr" rid="ref17">17</xref>], MutationTaster [<xref ref-type="bibr" rid="ref18">18</xref>] to build a support vector machine (SVM) deleteriousness predictive model. Although these tools adopt different approaches, including logistic regression and deep neural networks, to prioritize variants, most can only recognize the phenotypes defined in the HPO term [<xref ref-type="bibr" rid="ref19">19</xref>]. In this work, we developed the AI Variant Prioritizer module based on a machine learning approach that can output the rank of single-nucleotide variants (SNVs) and small insertions/deletions (indels) from whole-exome sequencing (WES) data with the input of free-text phenotypic description or EHR.</p>
        <p>In this research, we aimed to implement a website, AI Variant Prioritizer, that uses data from NGS bioinformatics pipelines with machine learning to make a prediction about the most possible disease-causing variants among SNVs and patient’s phenotype. The data generated from NGS pipelines are all structured with annotations from several tools including ANNOVAR, Nirvana, Variant Effect Predictor (VEP), and InterVar and additional information from multiple databases queried by MViewer (Mutation Viewer) [<xref ref-type="bibr" rid="ref20">20</xref>]. To simplify the interpretation process, we integrate the keyword extraction tool to generate the phenotype from EMRs automatically. Our system takes candidate variants filtered by MViewer and patient’s EMRs as its input and outputs a list of SNVs with rank and probability of being disease causing. Instead of checking every variant manually, this system can assist researchers and physicians in focusing on those with higher disease-causing probability and save a lot of time. Moreover, we implement a web application programming interface (API) for our system so that the ranking function could be integrated into MViewer. Thus, physicians are able to interpret the results of genetic variation with a single application instead of adopting numerous services.</p>
      </sec>
      <sec>
        <title>Data Description</title>
        <p>In our research, we focus on patients who have been diagnosed with rare Mendelian diseases. Our data are collected mainly from the rapid exome project of Department of Medical Genetics, National Taiwan University Hospital (NTUH). To build the model with more data, we also applied for several WES data that are deposited in the dbGaP database (project ID 20911). The data we use are the dbGaP accession phs000711.v5.p1 by Baylor Hopkins Center for Mendelian Genomics.</p>
        <p>The conditions under which we collect patients’ sequencing data to meet the requirements of this research are as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>Patients who were diagnosed with genetic disorders.</p>
          </list-item>
          <list-item>
            <p>Patients who received WES or targeted panel sequencing and diagnosed with at least one disease-causing variant.</p>
          </list-item>
          <list-item>
            <p>Patients whose phenotype information is available.</p>
          </list-item>
        </list>
        <p>Our data from NTUH include patient demographics, variant call format (VCF) file output by the NGS bioinformatics pipeline, and phenotype information from electrical medical records. Data from dbGaP also include patient demographics, VCF file, and clinical conditions. All data are deidentified and will not invade patients’ privacy. We include sex in patient demographic information as a feature in our model because some human genetic disorders are sex linked. Sex-linked diseases are caused by mutations in genes on X or Y chromosomes and passed down through families.</p>
      </sec>
      <sec>
        <title>Variant Call Format File</title>
        <p>As the end product of the NGS bioinformatics pipeline, the VCF is a generic format for storing DNA polymorphism data such as SNPs, insertions, deletions, and structural variants. The format was developed for the 1000 Genomes Project and has also been widely adopted by other projects. Every VCF file consists of 2 two parts: header section and data section. The header contains metadata about the tags and annotations in the data part. It can be also used to provide information related to the history of the data and file. The last line in the header contains the column headings for the data part. The data section is tab separated into 9 columns and reports a mutation for each row. Columns include CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, and FORMAT.</p>
      </sec>
      <sec>
        <title>Phenotype Information</title>
        <p>For the data from NTUH, we extract patient’s phenotypic information from clinicians’ history summary. It mainly records a brief summary of patient’s illness, clinical diagnosis, and the reason(s) why each patient was admitted. We also collect the phenotype keywords provided by doctors based on the symptom of each patient for model validation. For the data from dbGaP, because EHRs are not available, we will use the clinical condition of the patient instead. For the clinical condition that can be found in OMIM databases, we will extract the corresponding description of phenotypes as the phenotypic information to be used in our research.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Workflow</title>
        <sec>
          <title>Overview</title>
          <p><xref rid="figure1" ref-type="fig">Figure 1</xref> shows the workflow of our research. We collected VCF of each patient from WES and panel sequencing and then annotated the variants using several tools. After variant annotation, we used our in-house software (MViewer [<xref ref-type="bibr" rid="ref20">20</xref>]) to query additional external databases and filter for candidate variants. We then used the gene name of these candidate variants and keywords extracted by keyword extraction tools from EMRs to query Variant Prioritizer [<xref ref-type="bibr" rid="ref21">21</xref>]. The gene similarity scores generated by Variant Prioritizer and columns of annotated variants were used as features to train a machine learning model. This model ranks each variant that represents its disease-causing probability. We will demonstrate the details of each step in the following sections.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>The workflow of research. EMR: electronic medical record; indel: insertion/deletion; MViewer: Mutation Viewer; SNV: single-nucleotide variant; VCF: variant call format.</p>
            </caption>
            <graphic xlink:href="bioinform_v3i1e37701_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Variant Annotation</title>
          <p>We collected each patient’s NGS sequencing data in the VCF file and got annotations from several tools, including ANNOVAR [<xref ref-type="bibr" rid="ref22">22</xref>], VEP [<xref ref-type="bibr" rid="ref23">23</xref>], Nirvana [<xref ref-type="bibr" rid="ref24">24</xref>], and InterVar [<xref ref-type="bibr" rid="ref25">25</xref>]. For additional information that the aforementioned tools will not provide, we used software to import some public data sources, including ClinVar [<xref ref-type="bibr" rid="ref26">26</xref>], Human Genome Mutation Database (HGMD) [<xref ref-type="bibr" rid="ref27">27</xref>], and Taiwan Biobank [<xref ref-type="bibr" rid="ref28">28</xref>]. A detailed description of these annotation fields is summarized in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
          <boxed-text id="box1" position="float">
            <title>Description of annotation fields.</title>
            <p>
              <bold>Allele Frequency</bold>
            </p>
            <p>This describes the fraction of gene copies of a particular allele in a defined population. Allele frequency is calculated by dividing the number of copies of a particular allele in a population by the total number of all alleles for that gene in a population. It refers to how common an allele is in a population.</p>
            <p>
              <bold>Functional Prediction Score</bold>
            </p>
            <p>A range of scoring algorithms with capability to predict the potential deleteriousness of variants based on different information in them, such as their sequence homology, protein structure, and evolutionary conservation. These scoring methods include function prediction scores, conservation scores, and ensemble scores.</p>
            <p>
              <bold>Pathogenicity</bold>
            </p>
            <p>Clinical significance variants reported in 2 public databases, ClinVar and Human Gene Mutation Database (HGMD), that store information on gene mutation(s) related to human-inherited disease. Both classify variants as disease causing or disease associated by manual curation.</p>
            <p>
              <bold>Clinical Interpretation</bold>
            </p>
            <p>The American College of Medical Genetics and Genomics (ACMG) and the Association for Molecular Pathology (AMP) published standards and guidelines for the clinical interpretation of sequence variants with respect to human diseases on the basis of 28 criteria [<xref ref-type="bibr" rid="ref29">29</xref>]. These criteria are as follows: the criteria (16 overall) for classifying variants as pathogenic or likely pathogenic are very strong (PVS1), strong (PS1-PS4), moderate (PM1-PM6), or supporting (PP1-PP5), whereas the criteria (12 overall) for classifying variants as benign or likely benign are standalone (BA1), strong (BS1-BS4), or supporting (BP1-BP7).</p>
            <p>
              <bold>Gene-Level Constraint</bold>
            </p>
            <p>Constraint on gene expression levels has been shown to influence patterns of genetic variation within humans [<xref ref-type="bibr" rid="ref30">30</xref>]. For example, some genes are unusually depleted for loss of function and are thought to be constraint with respect to their expression. The Genome Aggregation Database (gnomAD) provides predicted constraint metrics track set that contains metrics of pathogenicity per gene as predicted and identifies genes subject to strong selection against various classes of mutation. These include several subtracks of constraint metrics calculated at gene, transcript, and transcript region levels.</p>
            <p>
              <bold>Disease Inheritance</bold>
            </p>
            <p>Patterns of inheritance that a trait or disorder associated with a variant can be passed down through families, such as autosomal dominant, autosomal recessive, X-linked, and mitochondrial inheritance. We used the patterns defined in OMIM (Online Mendelian Inheritance in Man) as our data.</p>
            <p>
              <bold>Others</bold>
            </p>
            <p>Additional information about genetic variants such as the gene name, genotype, and the functional consequence on the different transcripts for a gene or in proximal regulatory regions.</p>
          </boxed-text>
        </sec>
        <sec>
          <title>Variant Filtering</title>
          <p>There are on average 40,000 variants per proband in WES data. However, most of them are benign and not related to the symptoms. Only a small number of these variants are likely to be deleterious or relevant to the patient’s disease. In a standard clinical analysis process, physicians only focus on variants that might be pathogenic or unknown. As our model aims to assist researchers and physicians with their clinical exome reading, reducing the number of variants and focusing on the variants that are more likely to be responsible for the disease are necessary.</p>
          <p>For the purpose of generating candidate variants, we used the filter provided by MViewer to remove the variants that are not likely to be deleterious. The filters and criteria are listed in <xref ref-type="table" rid="table1">Table 1</xref>. For filters that contain more than 1 column, if a variant meets any of their criterion, it will remain in the data. We got approximately 700 SNVs per patient after variant filtering.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Filter criteria.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="190"/>
              <col width="320"/>
              <col width="490"/>
              <thead>
                <tr valign="top">
                  <td>Filter</td>
                  <td>Column</td>
                  <td>Criteria</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Max allele frequency</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Max Allele Frequency</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>≤0.01 (include no data)</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Nonsynonymous missense mutation</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>ExonicFunc.refgene</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>“nonsynonymous”</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Stop gain</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Consequence</p>
                      </list-item>
                      <list-item>
                        <p>ExonicFunc.refgene</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>“stop_gained”</p>
                      </list-item>
                      <list-item>
                        <p>“stopgain”</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Splice</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Consequence</p>
                      </list-item>
                      <list-item>
                        <p>Func.refgene</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>“splice_region_variant”</p>
                      </list-item>
                      <list-item>
                        <p>“splice_acceptor_variant”</p>
                      </list-item>
                      <list-item>
                        <p>“splice_donor_variant”</p>
                      </list-item>
                      <list-item>
                        <p>“splicing”</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Frameshift</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Consequence</p>
                      </list-item>
                      <list-item>
                        <p>ExonicFunc.refgene</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>“frameshift_variant”</p>
                      </list-item>
                      <list-item>
                        <p>“feature_truncation”</p>
                      </list-item>
                      <list-item>
                        <p>“feature_elongation”</p>
                      </list-item>
                      <list-item>
                        <p>“frameshift”</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Initial codon</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Consequence</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>“start_lost”</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Deletion</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Type</p>
                      </list-item>
                      <list-item>
                        <p>Consequence</p>
                      </list-item>
                      <list-item>
                        <p>ExonicFunc.refgene</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>“deletion”</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Insertion</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Type</p>
                      </list-item>
                      <list-item>
                        <p>Consequence</p>
                      </list-item>
                      <list-item>
                        <p>ExonicFunc.refgene</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>“insertion”</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Inframe deletion</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Consequence</p>
                      </list-item>
                      <list-item>
                        <p>ExonicFunc.refgene</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>“inframe_deletion”</p>
                      </list-item>
                      <list-item>
                        <p>“nonframeshift deletion”</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Exon/splice site</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Func.refgene</p>
                      </list-item>
                      <list-item>
                        <p>Consequence</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>“exonic”</p>
                      </list-item>
                      <list-item>
                        <p>“splicing”</p>
                      </list-item>
                      <list-item>
                        <p>“coding_sequence_variant”</p>
                      </list-item>
                      <list-item>
                        <p>“frameshift_variant”</p>
                      </list-item>
                      <list-item>
                        <p>“incomplete_terminal_codon_variant”</p>
                      </list-item>
                      <list-item>
                        <p>“inframe_deletion”</p>
                      </list-item>
                      <list-item>
                        <p>“inframe_insertion”</p>
                      </list-item>
                      <list-item>
                        <p>“missense_variant”</p>
                      </list-item>
                      <list-item>
                        <p>“splice_acceptor_variant”</p>
                      </list-item>
                      <list-item>
                        <p>“splice_donor_variant”</p>
                      </list-item>
                      <list-item>
                        <p>“splice_region_variant”</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Phenotype Extraction</title>
          <sec>
            <title>Overview</title>
            <p>The phenotype information used in this research is from clinicians’ history summary. The records were all free text and the length of texts varied from less than 10 to more than 300 words. In the clinical analysis process, it is time consuming for physicians to go through the medical records and identify the phenotype keywords manually. To solve this problem, we used several keyword extraction tools to automatically generate keywords related to phenotype from free-text medical records. The keyword extraction tools applied in our research are listed in the following sections.</p>
          </sec>
          <sec>
            <title>MetaMap</title>
            <p>MetaMap [<xref ref-type="bibr" rid="ref31">31</xref>] is a widely used application providing access to the concepts in the Unified Medical Language System (UMLS) Metathesaurus [<xref ref-type="bibr" rid="ref32">32</xref>]. The UMLS Metathesaurus is a compilation of names, relationships, and associated information from a variety of biomedical naming systems representing different views of biomedical practice or research. It comprises over 1 million biomedical concepts and 5 million concept names [<xref ref-type="bibr" rid="ref33">33</xref>]. MetaMap is able to map every word in the texts to UMLS concepts, but we just wanted to focus on those associated with phenotypes and diseases. Thus, we extracted the words that are classified as the semantic types of the following: (1) injury or poisoning, (2) cell or molecular dysfunction, (3) genetic function, (4) disease or syndrome, (5) sign or symptom, (6) tissue.</p>
          </sec>
          <sec>
            <title>Doc2Hpo</title>
            <p>Doc2Hpo [<xref ref-type="bibr" rid="ref34">34</xref>] is a web application using natural language processing (NLP) techniques to parse clinical note and get the phenotype concept curation as the HPO term. There is a parsing engine that will automatically recognize the phenotype concepts from the input. Doc2Hpo applies an algorithm called NegBio for negation detection in the input data. After that, there are several NLP engines responsible for HPO concept extraction. We used 3 of these engines and compared the performance of each of them. The first NLP engine is a string-based method that leverages the algorithm for concept extraction. The second engine is the online NCBO Annotator [<xref ref-type="bibr" rid="ref35">35</xref>] API for HPO concept recognition. The last engine we adopt is MetaMap Lite, which is a fast version of MetaMap that provides near–real-time named entity recognition. The MetaMap Lite engine first identifies clinical terms in the texts and maps them to standard UMLS concepts. The UMLS concepts will then be further mapped to HPO concepts. Results generated by Doc2Hpo are HPO terms, whereas keywords extracted by MetaMap are nonHPO terms.</p>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Phenotype-Gene Similarity Score</title>
        <p>Another method to construct the connections between genes and keywords is using the Okapi BM25 ranking function. Okapi BM25 is usually used by search engines, such as Google and Bing, to rank matching documents according to their relevance to a given search. One of the most prominent instantiations of the function is as the following equation:</p>
        <disp-formula>
          <graphic xlink:href="bioinform_v3i1e37701_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where score(<italic>D</italic>, <italic>Q</italic>) represents the Okapi BM25 score of a document <italic>D</italic> when given a query Q, containing keywords <italic>q</italic>1, <italic>q</italic>2,...,<italic>qn</italic>; <italic>f</italic>(<italic>qi</italic>, <italic>D</italic>) is <italic>qi</italic>’s term frequency in the document <italic>D</italic>; |<italic>D</italic>| is the length of document <italic>D</italic> in words; avgdl is the average document length among all documents; <italic>k</italic>1 and <italic>b</italic> are constants (=1.2 and 0.8, respectively); and IDF(<italic>qi</italic>) is the inverse document frequency (IDF) weight of the query term <italic>qi</italic> and is usually defined as:</p>
        <disp-formula>
        IDF(<italic>qi</italic>) = ln [(<italic>N</italic> – <italic>n</italic>(<italic>qi</italic>) + 0.5]/[<italic>n</italic>(<italic>qi</italic>) + 0.5 + 1]
        </disp-formula>
        <p>where <italic>N</italic> is the number of documents and <italic>n</italic> is the number containing the keywords.</p>
        <p>In this research, we propose an idea using gene description from OMIM and GeneReviews as documents and keywords as query to implement the Okapi BM25 ranking function. Therefore, we can use the Okapi BM25 score to represent the relationship between gene description and keywords. The higher score that gene description gets from keywords indicates stronger connection between that gene and keywords. Rank values were based on the Okapi BM25 ranking function mentioned before with some other parameters. Compared with the Okapi BM25 regular formula, rank value replaces the IDF function with Robertson-Spärck-Jones weight [<xref ref-type="bibr" rid="ref36">36</xref>]. The IDF function is a measure of how much information the word provides, that is, whether the word is common or rare across all documents. For example, the term “the” is very common in every document, so term frequency will be inclined to falsely highlight the documents that happen to use the word “the” more frequently. Hence, the IDF function is dedicated to reducing the weight of words that appear very frequently among all documents. In contrast to the regular IDF function, the Robertson-Spärck-Jones weight adds relevant parameters of documents and increases the precision of rank score.</p>
        <p>We get the phenotype-gene similarity score of each SNV from Variant Prioritizer, a text mining tool that outputs the rank and score of genes by entering symptoms as keywords. Variant Prioritizer uses the Okapi BM25 ranking function [<xref ref-type="bibr" rid="ref37">37</xref>] to construct the connections between genes and keywords. Gene descriptions from OMIM, GeneReviews, Entrez Gene [<xref ref-type="bibr" rid="ref38">38</xref>], and PubTator [<xref ref-type="bibr" rid="ref39">39</xref>] serve as data sources and keywords as query to implement the Okapi BM25 score using the full-text search method. It returns a column called RANK that includes ordinal value from 0 to 1000. The RANK score is based on the following formula:</p>
        <disp-formula>
          <graphic xlink:href="bioinform_v3i1e37701_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where ω is the Robertson-Spärck-Jones weight [<xref ref-type="bibr" rid="ref36">36</xref>], which is defined as ω = log [(<italic>r</italic> + 0.5)∙(<italic>N</italic> – <italic>n</italic> – <italic>R</italic> + <italic>r</italic> + 0.5)]/[(<italic>R</italic> – <italic>r</italic> + 0.5)∙(<italic>n</italic> – <italic>r</italic> + 0.5)], in which <italic>R</italic> is the number of known relevant documents and <italic>r</italic> is the number of these containing the term; <italic>tf</italic> is the frequency of the word in the property queried within an article; <italic>qtf</italic> is the frequency of the term in the query; and <italic>K</italic> is defined as follows:</p>
        <disp-formula><italic>K</italic> = <italic>k</italic><sub>1</sub>[(1 – <italic>b</italic>) + <italic>b</italic>(<italic>dl</italic>/avgdl)]
        </disp-formula>
        <p>where <italic>dl</italic> is the property length, in word occurrence; avgdl is the average length of the property being queried, in word occurrence; and <italic>k</italic><sub>1</sub>, <italic>b</italic>, and <italic>k</italic><sub>3</sub> are constants (=1.2, 0.75, and 8.0, respectively).</p>
        <p>We employed the Variant Prioritizer API to get the RANK value from each data source as gene similarity score to represent the association between each SNVs and extracted keywords. We kept the maximum and minimum scores of rank values (4 overall) as 2 separate features for model building.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This retrospective cohort study was approved by the Institutional Review Board (IRB) of the National Taiwan University Hospital (IRB number: 201710066RINB). We confirm that all experiments were performed in accordance with relevant guidelines and regulations. The data retrieved from EHRs were deidentified and could not be linked to the patients’ identity by the research team. The need for written informed consent was waived and confirmed by the National Taiwan University Hospital IRB (201710066RINB) because this was a retrospective cohort study with deidentified data. This regulation complies to Health Insurance Portability and Accountability Act (HIPAA) that there are no restrictions on the use or disclosure of deidentified health information.</p>
      </sec>
      <sec>
        <title>Data Preprocessing</title>
        <sec>
          <title>Overview of Steps</title>
          <p>After variant annotation of the VCF file, we preprocessed our data into a model-acceptable format. Data preprocessing is an extremely important step in machine learning because the quality of data can directly affect the ability of a model to learn. It includes various operations and each operation aims to help machine learning build better predictive models. The data preprocessing operations used in this research are explained in the following sections.</p>
        </sec>
        <sec>
          <title>Missing Value Handling</title>
          <p>In real world, the data usually have missing values. AsFor example, in the genotype variable most machine learning methods cannot deal with null value, it is pivotal to identify and correctly handle the missing values. Basically, the missing values can be handled using various techniques such as deletion or imputation [<xref ref-type="bibr" rid="ref40">40</xref>]. Deletion removes all data for an observation that has 1 or more missing values. However, if there are many columns with missing values, then deletion will result in the lack of data. Therefore, for some columns we used imputation by substituting the missing values in our data set with mean and for some columns we just simply replaced the missing value with a valid value such as 0.</p>
        </sec>
        <sec>
          <title>One Hot Encoding</title>
          <p>Many machine learning algorithms cannot operate on categorical data directly. They require all input features to be numeric. Basically, categorical data contain label values rather than numeric values. As a consequence, categorical data must be converted into a numerical form so that they can be used in the machine leaning model. One hot encoding is a widespread approach for dealing with categorical data. One hot encoding transforms a categorical column to a multidimensional vector. It creates new columns, indicating the presence of each possible value from the original data.</p>
          <p>For example, in the genotype variable, there are 3 categories: homozygous (hom), heterozygous (het), and hemizygous (hem). Therefore, 3 binary variables [hom, het, hem] are needed. If genotype of a variant is heterozygous, we use [0,1,0] to represent it.</p>
        </sec>
        <sec>
          <title>Data Normalization</title>
          <p>For continuous data, there are a few with different ranges. If we apply features in very different ranges to some machine learning models such as logistic regression, the feature with broader range will intrinsically influence the result more owing to its larger value. However, this does not necessarily mean that this feature is more important as a predictor. Therefore, we used normalization techniques as a solution to overcome this problem. Normalization is the rescaling of the data from the original range so that all values are within the range of 0 and 1. We rescale all continuous values by min-max normalization. The general formula is as follows:</p>
          <disp-formula><italic>X</italic>norm = (<italic>X – X</italic>min)<italic>/</italic>(<italic>X</italic>max <italic>– X</italic>min)
          </disp-formula>
          <p>where <italic>X</italic> is the original value and <italic>X</italic>norm is the normalized value. This will make the maximal value map to 1 and the minimal value map to 0. In addition to the aforesaid data preprocessing techniques, we handled different data types in different ways and created some new features for model building. In the following sections, we elaborate on each data type preprocessing and combine them in the end.</p>
        </sec>
        <sec>
          <title>Functional Prediction Score</title>
          <p>Functional prediction scores including SIFT [<xref ref-type="bibr" rid="ref17">17</xref>], PolyPhen2 HDIV [<xref ref-type="bibr" rid="ref16">16</xref>], PolyPhen2 HVAR [<xref ref-type="bibr" rid="ref16">16</xref>], LRT [<xref ref-type="bibr" rid="ref41">41</xref>], MutationTaster [<xref ref-type="bibr" rid="ref18">18</xref>], MutationAssessor [<xref ref-type="bibr" rid="ref42">42</xref>], FATHMM [<xref ref-type="bibr" rid="ref43">43</xref>], PROVEAN [<xref ref-type="bibr" rid="ref44">44</xref>], MetaSVM [<xref ref-type="bibr" rid="ref14">14</xref>], MetaLR [<xref ref-type="bibr" rid="ref14">14</xref>], M-CAP [<xref ref-type="bibr" rid="ref45">45</xref>], CADD [<xref ref-type="bibr" rid="ref13">13</xref>], GERP++ [<xref ref-type="bibr" rid="ref46">46</xref>], DANN [<xref ref-type="bibr" rid="ref47">47</xref>], fathmm-MKL [<xref ref-type="bibr" rid="ref48">48</xref>], GenoCanyon [<xref ref-type="bibr" rid="ref49">49</xref>], fitCons [<xref ref-type="bibr" rid="ref50">50</xref>], PhyloP [<xref ref-type="bibr" rid="ref51">51</xref>], PhastCons [<xref ref-type="bibr" rid="ref52">52</xref>], and SiPhy [<xref ref-type="bibr" rid="ref53">53</xref>] were from ANNOVAR. We used converted rank scores provided by ANNOVAR instead of the original score because all these scores are always within the range of 0 and 1. Besides, converted rank scores from different algorithms are monotonic in the same direction. That is, a higher score indicates that the variant is more likely to be damaging [<xref ref-type="bibr" rid="ref54">54</xref>]. For splice site prediction, we imported the MaxEntScan score using the VEP plugin. We defined a new column called MaxEntScan significance. The value is 1 when the value of MaxEntScan alt is smaller than 3 and MaxEntScan variation is smaller than 30%; otherwise the value is 0. We used clinical significance reported in ClinVar and computed rank score from the HGMD. The HGMD computed rank score is a probability of pathogenicity between 0 and 1, with 1 being most likely disease causing compared with other HGMD entries.</p>
        </sec>
        <sec>
          <title>Clinical Interpretation</title>
          <p>We employed clinical interpretation of each genetic variant based on the American College of Medical Genetics and Genomics/Association for Molecular Pathology (ACMG/AMP) 2015 guideline, which is generated by InterVar. We calculated the ACMG score developed by Xrare to represent its overall pathogenicity. The ACMG score is a weighted sum score based on multiple evidence (n=14) with the following weights for each term: PVS1:6, PS1:4, PM1:2, PM2:2, PM4:2, PM5:2, PP2:1, PP3:1, BA1:9, BS1:3, BS2:3, BP3:1, BP4:1, BP7:2 [<xref ref-type="bibr" rid="ref9">9</xref>]. We collected gene-level constraint features including pLI, pRec, syn_z, and mis_z from the Genome Aggregation Database (gnomAD). We used the patterns of inheritance defined in OMIM as our data. For variants that contain multiple patterns, we calculated the occurrences of each pattern and stored it as a feature. We also get some additional information about each variant from ANNOVAR such as genotype, regions that a variant hits, and read depths. The quality of each variant is also collected from the VCF file. As the genotype annotated by ANNOVAR does not contain hemizygous alleles, we replaced the genotype feature of all male patients’ chromosome X with hemizygous alleles. In addition, we collected functional consequence on the different transcripts for a gene or in proximal regulatory regions using Nirvana.</p>
        </sec>
        <sec>
          <title>Labels</title>
          <p>The goal of our research was to identify the disease-causing variants with SNVs (ie, we classify a variant as disease causing or not). As machine learning algorithms learn how to assign a class label to a test case from examples, it is necessary to assign a class label to all input training sets. We used the 0/1 label to represent whether a variant is disease causing or not. If a variant is causative, we assigned label 1 to it; otherwise the label is 0. Details about all the features used in our model are presented in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        </sec>
      </sec>
      <sec>
        <title>Feature Selection</title>
        <p>After data preprocessing, we got 94 features for each variant. To reduce the high dimension of the input data set while retaining the discriminatory information for classification problems, we applied univariate feature selection techniques from scikit-learn [<xref ref-type="bibr" rid="ref55">55</xref>] packages to identify the relevant variables in a data set and eliminate the useless variables. Feature selection helps to reduce the noise in the data set and lets the model focus on the relevant signals.</p>
        <p>There are several scoring functions provided by scikit-learn univariate feature selection modules. We used mutual information classifier to select the most relevant variables. Mutual information [<xref ref-type="bibr" rid="ref56">56</xref>] between 2 random variables is a nonnegative value, which measures the general dependence of variables without making any assumptions about the nature of their underlying relationships [<xref ref-type="bibr" rid="ref57">57</xref>]. The mutual information between 2 discrete random variables X and Y is defined as follows:</p>
        <disp-formula>
          <graphic xlink:href="bioinform_v3i1e37701_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>p</italic>(<italic>x</italic>, <italic>y</italic>) is the joint probability density function of <italic>X</italic> and <italic>Y</italic>, and <italic>p</italic>(<italic>x</italic>) and <italic>p</italic>(<italic>y</italic>) are the marginal density function. The mutual information determines the similarity between the joint distribution <italic>p</italic>(<italic>x</italic>, <italic>y</italic>) and the products of the factored marginal distributions. The larger the value means the greater the relationship between the 2 variables. The calculated value is equal to 0 if and only if the 2 variables are independent.</p>
        <p>We performed the feature selection process using only the training set to determine the relevant variable. Further, the number of features we selected is based on model evaluation with 10fold cross validation</p>
      </sec>
      <sec>
        <title>Building Model</title>
        <p>To construct a model by machine learning algorithm, we split the data into 2 groups. As our model aims to assist physicians with their clinical exome data interpretation process, the exome data from the dbGaP database and the targeted gene panel sequencing data from NTUH were set as training set, and the WES data from NTUH were set as testing data. which can only be used on model evaluation. The external validation set consisted of 90 most recent NTUH WES data, which help to make sure that our model can make predictions in future clinical use. Details about the training and testing sets are listed in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <p>To build the machine learning model, we implemented the random forests algorithm [<xref ref-type="bibr" rid="ref58">58</xref>] provided by scikitlearn packages. The selection of hyperparameters is based on a grid search with 10fold cross validation. Random forest was first proposed by Leo Breiman in 2001 [<xref ref-type="bibr" rid="ref58">58</xref>]. It is an ensemble classifier that evolves from decision trees. Actually, random forests are a combination of decision trees such that each tree depends on the values of a random vector sampled independently, with the same distribution for all trees in the forest [<xref ref-type="bibr" rid="ref59">59</xref>]. A forest of trees is grown as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>The training set is a bootstrap sample from the original training set.</p>
          </list-item>
          <list-item>
            <p>The number of trees to build and the number of variables randomly sampled as candidates at each split m-try are set by the user, where m-try is less than the total number of variables.</p>
          </list-item>
          <list-item>
            <p>At each node, m-try variables are selected at random, and the node is split on the best split point among m-try. This process iterates until the tree grows to its maximal depth.</p>
          </list-item>
          <list-item>
            <p>For test case prediction, as a test vector <bold>x</bold> is put down at each tree, it is assigned the average of <bold>y</bold> values at the node it stops at. The average of these overall trees in the forest is the predicted value for <bold>x</bold>. The predicted value for classification is the class getting the plurality of the forest votes..</p>
          </list-item>
        </list>
        <p>The function we used to measure the quality of a split is Gini impurity. Gini impurity is the probability of incorrectly classifying a randomly chosen element in the data set if it were randomly labeled according to the class distribution in the data set [<xref ref-type="bibr" rid="ref60">60</xref>]. In decision tree learning it is defined as <inline-graphic xlink:href="bioinform_v3i1e37701_fig15.png" xlink:type="simple" mimetype="image"/>, where <italic>c</italic> is the number of classes and <italic>p</italic>(<italic>i</italic>|<italic>t</italic>) is the probability of randomly picking an object of class <italic>i</italic> at node <italic>t</italic>. The optimal split from a root node when training a decision tree is chosen by maximizing the Gini gain, which is calculated by subtracting the weighted impurities of the branches from the original impurity.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The training, testing, and external validation sets used in this study.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="280"/>
            <col width="240"/>
            <col width="240"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Data</td>
                <td>Training set</td>
                <td>Testing set</td>
                <td>External validation set</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Source</td>
                <td>dbGaP<sup>a</sup>, NTUH<sup>b</sup> panel</td>
                <td>NTUH WES<sup>c</sup></td>
                <td>New NTUH WES</td>
              </tr>
              <tr valign="top">
                <td>Patients, n</td>
                <td>381</td>
                <td>108</td>
                <td>90</td>
              </tr>
              <tr valign="top">
                <td>Filtered variants, n</td>
                <td>125,693</td>
                <td>80,083</td>
                <td>109,857</td>
              </tr>
              <tr valign="top">
                <td>Causative variants, n</td>
                <td>478</td>
                <td>134</td>
                <td>100</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>dbGaP: Database of Genotypes and Phenotypes.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>NTUH: National Taiwan University Hospital.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>WES: whole-exome sequencing.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Performance Evaluation</title>
        <p>To evaluate our model performance of true causative variant prioritization, we used the ranking statistics mentioned in VarSight. After we applied the binary classification process to all variants, we got a probability for each variant that represents the probability of this variant to be disease causing. We ranked the variants for each patient from the highest to lowest probability and quantified the percentage of the target variants that were ranked in the top 1, 5, 10, 20.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Feature Selection</title>
        <p>For the feature selection, we used univariate feature selection based on the SelectPercentile method in scikitlearn package. The classifier we chose is the mutual information classifier. Only the training set was used for selecting the most relevant features. Further, we applied 10fold cross validation to decide the number of features for model training. In <xref rid="figure2" ref-type="fig">Figure 2</xref>, we present the top 10 accuracy on 10fold cross validation using different percentages of features. As using 60% of features achieves the highest accuracy, 56 features (60% of total 94 features) with the highest estimated mutual information were selected for the final model building.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The top 10 accuracy on 10-fold cross validation using different percentage of features.</p>
          </caption>
          <graphic xlink:href="bioinform_v3i1e37701_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Model Performance</title>
        <p>We evaluated the model with our testing set. As mentioned in <xref ref-type="table" rid="table2">Table 2</xref>, the testing set comprised 108 patients who received WES with at least one disease-causing variant diagnosed by doctors. <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> presents detailed information about their causative variants, keywords, and the corresponding HPO term. The keywords and HPO term are determined by doctors based on the phenotype of each patient.</p>
      </sec>
      <sec>
        <title>Prediction With Different Keyword Extraction Tools</title>
        <p><xref rid="figure3" ref-type="fig">Figure 3</xref> shows the percentage distribution of the ranking of target variants and <xref rid="figure4" ref-type="fig">Figure 4</xref> shows the cumulative rank result of models using different keyword extraction tools. When using tools to extract phenotypes from abstracts, our model can assign the target variants to the top rank for over 40% (60/134, 44.8%) of the total variants. The top 10 accuracies of models are around 90% (124/134, 92.5%), irrespective of the keyword extraction tool used. Compared with the keywords provided by professional doctors, applying tools to extract keywords had lower top 1 accuracy but comparable top 10 accuracy. This indicated that in most cases our model can successfully rank the true causative variants in the front of the variant lists and the rank is slightly influenced by the input keywords.</p>
        <p>We built a random forest model based on the method described in the previous section and evaluated it with our testing set based on different keyword extraction tools. We succeeded in locating 92.5% (124/134) of the causative variant in the top 10 ranking list among an average of 741 candidate variants per person after filtering. The performance of the model is similar to that of manual analysis, and it has been used to help National Taiwan University Hospital with a genetic diagnosis.</p>
        <p><xref rid="figure3" ref-type="fig">Figures 3</xref> and <xref rid="figure4" ref-type="fig">4</xref> show the percentage distribution of the ranking of target variants and the cumulative rank result of models using different keyword extraction tools, respectively. When using tools to extract phenotypes from abstracts, our model can assign the target variants to the top rank for over 40% (60/134, 44.8%) of the total variants. The top 10 accuracies of models are around 90% (124/134, 92.5%), irrespective of the keyword extraction tool used. Compared with the keywords provided by professional doctors, applying tools to extract keywords has lower top 1 accuracy but comparable top 10 accuracy. It represents that in most cases our model can successfully rank the true causative variants in the front of the variant lists and the rank is slightly influenced by the input keywords.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Percentage distribution of ranks.</p>
          </caption>
          <graphic xlink:href="bioinform_v3i1e37701_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Cumulative percentage distribution of ranks. NCBO: National Center for Biomedical Ontology.</p>
          </caption>
          <graphic xlink:href="bioinform_v3i1e37701_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Other Machine Learning Methods</title>
        <p>We also evaluated other machine learning methods and compared their performance with random forest. These methods include logistic regression, Gaussian naive Bayes, SVM with RBF kernel, and gradient boosted decision trees. The selection of hyperparameters for each algorithm was based on grid search with 10-fold cross validation. We used MetaMap as the keyword extraction tool and our testing data to test the performance of each method. The percentage distribution of the ranking of target variants by each machine learning method and the cumulative rank result of each model are shown in <xref rid="figure5" ref-type="fig">Figures 5</xref> and <xref rid="figure6" ref-type="fig">6</xref>, respectively. As random forest got the highest top 10 accuracy, we finally chose random forest as our machine learning algorithm.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Percentage distribution of ranks. GBDT: gradient boosting decision tree; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="bioinform_v3i1e37701_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Cumulative percentage distribution of ranks. GBDT: gradient boosting decision tree; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="bioinform_v3i1e37701_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We have implemented a website, AI Variant Prioritizer, which uses data from NGS bioinformatics pipelines with machine learning to make a prediction about most possible disease-causing variants among SNVs and patient’s phenotype data. This system can assist researchers and physicians by focusing on those with higher disease-causing probability and reducing the average turnaround time (by 1 day) of the entire WES pipeline, from DNA extraction to clinical diagnosis. Moreover, we have implemented a web API for our system so that the ranking function could be integrated into MViewer. Thus, physicians can interpret the results of genetic variation with a single application instead of adopting numerous services.</p>
        <p>For comparison, we used our testing data to run several prioritization tools including AMELIE [<xref ref-type="bibr" rid="ref61">61</xref>], VarElect [<xref ref-type="bibr" rid="ref62">62</xref>], Exomiser, Phenolyzer, and Variant Prioritizer. As AMELIE and Exomiser can only accept phenotypes defined in HPO terms, we entered HPO terms determined by doctors as their input. Phenolyzer can identify both disease terms and HPO terms. However, if the terms do not match in their databases, it will not return any record. Hence, we also used HPO terms as input for Phenolyzer. VarElect, Variant Prioritizer, and our model can identify free-text descriptions. Therefore, we imputed the keywords provided by doctors as input for testing. AMELIE, VarElect, and Variant Prioritizer only prioritize the gene list instead of the variant list. Hence, we evaluated the result for gene-based prioritization instead of variant-based prioritization. That is, for each patient, if the tools prioritize target gene in the top 1, 5, 10, 20, 50, and 100 of our filtered gene lists, this patient will be counted. All the tools use the default settings provided in their websites to run.</p>
        <p><xref rid="figure7" ref-type="fig">Figures 7</xref> and <xref rid="figure8" ref-type="fig">8</xref> show the percentage and cumulative percentage distribution of the target gene ranking for each tool, respectively. From <xref rid="figure8" ref-type="fig">Figure 8</xref>, we can see that AI Variant Prioritizer is able to assign the target gene to the top rank for 61.1% (66/108) of the patients, followed by Variant Prioritizer (48/108, 44.4%). It also shows the cumulative rank result, which reveals that our AI Variant Prioritizer has the highest accuracy at ranks 1, 5, 10, and 20. Further, AI Variant Prioritizer shows better performance than other tools. After adopting the HPO terms by looking up the databases, the top 10 ranking list can be increased to 93.5% (101/108).</p>
        <p>In comparison with extraction of phenotypic features from SNOMED through manual mapping of HPO terms to SNOMED Clinical Terms (SNOMED CT) [<xref ref-type="bibr" rid="ref63">63</xref>], our automation approach explores various phenotypic feature extraction tools and focuses on rare disease interpretation. We have also looked into several AI-driven variant prioritization approaches published in the last 3 years, including Fabric GEM [<xref ref-type="bibr" rid="ref12">12</xref>], MOON [<xref ref-type="bibr" rid="ref2">2</xref>], and Exomiser. There are several differences between our approach and each of these approaches, including the algorithms used to build the prioritization model, the features considered, and databases integrated. However, the major difference of our approach from others is the method used to turn the relationships between genes and phenotypes into numerical values, which makes way for later prediction. Fabric GEM and MOON utilize Phevor [<xref ref-type="bibr" rid="ref15">15</xref>] to turn phenotype-gene relationship into numerical values, whereas Exomiser uses PhenoDigm [<xref ref-type="bibr" rid="ref64">64</xref>] to achieve this goal.</p>
        <p>Both Phevor and PhenoDigm construct new methods that bridge HPO and other ontologies to discover more gene-disease associations. Phevor gathers all correlation of diseases and genes provided by HPO and Gene Ontology (GO) and constructs several networks (graphs) and distributes decreasing weights along the paths found. The sum of weights on the specific gene node represents the correlation score of the gene with the corresponding HPO term. PhenoDigm utilizes OWLSim [<xref ref-type="bibr" rid="ref65">65</xref>] to calculate the similarity among different phenotypes in different ontologies and uses similarity to estimate the magnitude of correlation of given HPO terms and different genes. By contrast, Variant Prioritizer used in our approach extracts the phenotype-gene relationship from a different kind of knowledge source: free text of database. We make a simple comparison of these approaches in <xref ref-type="table" rid="table3">Tables 3</xref> and <xref ref-type="table" rid="table4">4</xref>.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Percentage distribution of ranks. AI: artificial intelligence.</p>
          </caption>
          <graphic xlink:href="bioinform_v3i1e37701_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Cumulative percentage distribution of ranks. AI: artificial intelligence.</p>
          </caption>
          <graphic xlink:href="bioinform_v3i1e37701_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>The comparison among AI Variant Prioritizer, Fabric GEM, MOON, and Exomiser.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Tool</td>
                <td>AI<sup>a</sup> Variant Prioritizer</td>
                <td>Fabric GEM</td>
                <td>MOON</td>
                <td>Exomiser</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Variant scoring algorithm</td>
                <td>Random forest</td>
                <td>Bayes factor</td>
                <td>Decision trees, Bayesian models, neural networks</td>
                <td>Rule based</td>
              </tr>
              <tr valign="top">
                <td>Phenotype-gene score</td>
                <td>Variant Prioritizer</td>
                <td>Phevor</td>
                <td>Phevor</td>
                <td>PhenoDigm</td>
              </tr>
              <tr valign="top">
                <td>Phenotype input format</td>
                <td>Plain texts</td>
                <td>HPO<sup>b</sup> terms</td>
                <td>HPO terms extracted from electronic health record</td>
                <td>HPO terms</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>AI: artificial intelligence.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>HPO: Human Phenotype Ontology.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The comparison among Variant Prioritizer, Phevor, and PhenoDigm.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Tool</td>
                <td>Variant Prioritizer</td>
                <td>Phevor</td>
                <td>PhenoDigm</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Algorithm</td>
                <td>Okapi BM25</td>
                <td>Graph algorithm</td>
                <td>OWLSim</td>
              </tr>
              <tr valign="top">
                <td>Phenotype input format</td>
                <td>Plain texts</td>
                <td>HPO<sup>a</sup> terms</td>
                <td>HPO terms</td>
              </tr>
              <tr valign="top">
                <td>Knowledge base</td>
                <td>OMIM<sup>b</sup>, GeneReviews, Entrez Gene and PubTator</td>
                <td>HPO and GO<sup>c</sup></td>
                <td>OMIM (HPO), Sanger-MGP [<xref ref-type="bibr" rid="ref66">66</xref>], MGD [<xref ref-type="bibr" rid="ref67">67</xref>], and ZFIN [<xref ref-type="bibr" rid="ref68">68</xref>]</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>HPO: Human Phenotype Ontology.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>OMIM: Online Mendelian Inheritance in Man.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>GO: Gene Ontology.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Feature Importance</title>
        <p>For interpreting model predictions, we used the feature importance method provided by scikit-learn to identify which feature has the most predictive power. <xref rid="figure9" ref-type="fig">Figure 9</xref> shows the top 20 important features. According to clinical experience, the connection between a variant and phenotype of a patient is a key factor that influences the physician to decide whether to report a variant or not. Similarly, from the figure we can see that the most important feature is the max bm25 score, which refers to the similarity score between the given variant and keywords. Another important factor that influences the reporting decision during clinical analysis is the severity of a variant. The corresponding feature we use is the ACMG score, which is in the second place of feature importance. By contrast, the result of feature importance might provide information for physicians or researchers about the features that they can consider when finding causative variant.</p>
        <fig id="figure9" position="float">
          <label>Figure 9</label>
          <caption>
            <p>Feature importance.</p>
          </caption>
          <graphic xlink:href="bioinform_v3i1e37701_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>External Validation</title>
        <p>We compared the cumulative percentage distribution of ranks from the testing set and the external validation set. The result is shown in <xref rid="figure10" ref-type="fig">Figures 10</xref> and <xref rid="figure11" ref-type="fig">11</xref>. Their percentage values are close to each other in different regions such as top 10 and top 5. The percentage of top 1 rank of the external validation set is even higher than that of the testing set. With this result, we believe that our approach has shown its potential for robust clinical usage.</p>
        <fig id="figure10" position="float">
          <label>Figure 10</label>
          <caption>
            <p>Percentage distribution of ranks.</p>
          </caption>
          <graphic xlink:href="bioinform_v3i1e37701_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure11" position="float">
          <label>Figure 11</label>
          <caption>
            <p>Cumulative percentage distribution of ranks.</p>
          </caption>
          <graphic xlink:href="bioinform_v3i1e37701_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The study has several potential limitations. First, we could not find massive data for training and testing. This not only restricts the amount of teaching material for the machine learning model, but also restricts available measurements to evaluate the trained model. Second, the gene-phenotype score used in this study did not have enough power to detect small or moderate associations because it relies on how frequently the gene-phenotype relationship is reported to the databases it utilizes. Finally, the study did not adjust for potential confounders, such as diet and physical activity. This could cause potential bias because the way in which genes are expressed can be impacted by lifestyle of patients.</p>
        <p>Overall, this study could have potential bias resulting from the lack of sufficient data, lack of reported gene-phenotype relationship, and lack of observation of lifestyle. The impact from the first and the second can be reduced if there are more data and reports available in the future. On the other side, the influence of lifestyle and environment remains an issue that needs more dedicated studies.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this research, we proposed a machine learning model, AI Variant Prioritizer, to predict whether a variant is disease causing for patients with rare Mendelian disorder. We have successfully applied sequencing data from WES and free-text phenotypic information of patient’s disease automatically extracted by keyword extraction tools for model training and testing. By interpreting our model, we identified which features of variants are important. Besides, we achieved a satisfactory result on finding the target variant in our testing data set. After testing 108 patients’ WES data, we succeeded in 93.5% (n=101) of the cases to locate the causative variant in the top 10 ranking list among an average of 741 candidate variants per person after the filtering process. The performance of the model is similar to that of manual analysis by the physicians in the Department of Medical Genetics, NTUH, and it has been used to help NTUH with a genetic diagnosis.</p>
        <p>As the physicians are very busy almost all the time in taking care of their patients, the search time spent for an accurate genetic diagnosis is extremely important. Our AI predicting model can provide the top 10 hit list with a high probability of 93.5% (101/108), thus helping them save weeks of time if they have to do it manually to search beyond the top 10 list very often.</p>
        <p>It is not an easy work to fully interpret the causative variations of a genetic disease. As the precision of the keywords extracted by tools influence the performance of our model, for the future work, we will adopt some NLP techniques such as Bidirectional Encoder Representations from Transformers (BERT) to extract keywords more properly. In addition, the AI Variant Prioritizer model has been built to analyze SNVs and small indels from WES data, but we have not dealt with copy number variations (CNVs) yet. CNVs have been recognized as critical genetic variations, which are associated with both common and complex diseases, and thus have a large influence on several Mendelian and somatic genetic disorders. Therefore, we will collect data on CNVs and extend the capability of our system to annotate and filter CNVs. Furthermore, we will enlarge our data set by adding CNVs as our training data to enable the AI Variant Prioritizer model to predict any kind of causative genetic variations. Before implementation of AI Variant Prioritizer, the mean turnaround time of the entire WES pipeline, from DNA extraction to clinical diagnosis, was 5.8 (SD 1.1) days using Variant Prioritizer. However, after implementation of AI Variant Prioritizer, the mean turnaround time was reduced to 4.8 (SD 1.2) days for rapid trio exome sequencing analysis in NTUH.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Allele frequency.</p>
        <media xlink:href="bioinform_v3i1e37701_app1.docx" xlink:title="DOCX File , 16 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Description of features used in this research.</p>
        <media xlink:href="bioinform_v3i1e37701_app2.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 13 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Target variants, HPO term, and keywords of test case. HPO: Human Phenotype Ontology.</p>
        <media xlink:href="bioinform_v3i1e37701_app3.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 24 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ACMG</term>
          <def>
            <p>American College of Medical Genetics and Genomics</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AMP</term>
          <def>
            <p>Association for Molecular Pathology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CNV</term>
          <def>
            <p>copy number variation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">GBDT</term>
          <def>
            <p>gradient boosting decision tree</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">gnomAD</term>
          <def>
            <p>Genome Aggregation Database</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">GO</term>
          <def>
            <p>Gene Ontology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">HGMD</term>
          <def>
            <p>Human Genome Mutation Database</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">HIPAA</term>
          <def>
            <p>Health Insurance Portability and Accountability Act</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">HPO</term>
          <def>
            <p>Human Phenotype Ontology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">IRB</term>
          <def>
            <p>institutional review board</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">MViewer</term>
          <def>
            <p>Mutation Viewer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">NGS</term>
          <def>
            <p>next-generation sequencing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">NTUH</term>
          <def>
            <p>National Taiwan University Hospital</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">OMIM</term>
          <def>
            <p>Online Mendelian Inheritance in Man</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">SNV</term>
          <def>
            <p>single-nucleotide variant</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb22">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb23">VCF</term>
          <def>
            <p>variant call format</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb24">VEP</term>
          <def>
            <p>Variant Effect Predictor</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was funded by a grant from the Ministry of Science and Technology (110-2634-F-002-032-) of Taiwan. The analyses presented in this publication are based on the use of study data downloaded from the dbGaP website, under dbGaP accession numbers phs000744.v4.p2, phs001272.v1.p1, phs000971.v2.p1, phs000711.v6.p2, and phs001232.v3.p2. W-LH applied for the data with project name “Variant prioritization for rapid exome analysis of rare genetic disease” (project ID 20911). Data were downloaded from the FTP site of dbGaP after approval.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>Y-SH investigated the model and data feasibility, performed formal analysis, developed the software, visualized data, and wrote the initial manuscript. CH conceived the idea, curated the data, reviewed the manuscript, and advised the software development team. N-CL and W-LH conceived the idea, curated the patient data, and reviewed and edited the draft. Y-CC and I-CL edited, revised, and strengthened the manuscript. HW and Y-LL tested the data performance. FPL supervised the project progress and supported the project and managed the project and reviewed the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Behjati</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tarpey</surname>
              <given-names>PS</given-names>
            </name>
          </person-group>
          <article-title>What is next generation sequencing?</article-title>
          <source>Arch Dis Child Educ Pract Ed</source>
          <year>2013</year>
          <month>12</month>
          <volume>98</volume>
          <issue>6</issue>
          <fpage>236</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23986538"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/archdischild-2013-304340</pub-id>
          <pub-id pub-id-type="medline">23986538</pub-id>
          <pub-id pub-id-type="pii">archdischild-2013-304340</pub-id>
          <pub-id pub-id-type="pmcid">PMC3841808</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Potter</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Letaw</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Kulkarni</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>CS</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence (AI)-assisted exome reanalysis greatly aids in the identification of new positive cases and reduces analysis time in a clinical diagnostic laboratory</article-title>
          <source>Genet Med</source>
          <year>2022</year>
          <month>01</month>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>192</fpage>
          <lpage>200</lpage>
          <pub-id pub-id-type="doi">10.1016/j.gim.2021.09.007</pub-id>
          <pub-id pub-id-type="medline">34906498</pub-id>
          <pub-id pub-id-type="pii">S1098-3600(21)04134-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sanger</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Nicklen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Coulson</surname>
              <given-names>AR</given-names>
            </name>
          </person-group>
          <article-title>DNA sequencing with chain-terminating inhibitors</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>1977</year>
          <month>12</month>
          <volume>74</volume>
          <issue>12</issue>
          <fpage>5463</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/271968"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.74.12.5463</pub-id>
          <pub-id pub-id-type="medline">271968</pub-id>
          <pub-id pub-id-type="pmcid">PMC431765</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hamosh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Amberger</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Valle</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>McKusick</surname>
              <given-names>VA</given-names>
            </name>
          </person-group>
          <article-title>Online Mendelian Inheritance in Man (OMIM)</article-title>
          <source>Hum Mutat</source>
          <year>2000</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>57</fpage>
          <lpage>61</lpage>
          <pub-id pub-id-type="doi">10.1002/(SICI)1098-1004(200001)15:1&lt;57::AID-HUMU12&gt;3.0.CO;2-G</pub-id>
          <pub-id pub-id-type="medline">10612823</pub-id>
          <pub-id pub-id-type="pii">10.1002/(SICI)1098-1004(200001)15:1&lt;57::AID-HUMU12&gt;3.0.CO;2-G</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Adam</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Everman</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Mirzaa</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Pagon</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Bean</surname>
              <given-names>LJH</given-names>
            </name>
            <name name-style="western">
              <surname>Gripp</surname>
              <given-names>KW</given-names>
            </name>
          </person-group>
          <source>GeneReviews</source>
          <year>1993</year>
          <publisher-loc>Seattle, WA</publisher-loc>
          <publisher-name>University of Washington, Seattle</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Faintuch</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Faintuch</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>Precision Medicine for Investigators, Practitioners and Providers</source>
          <year>2019</year>
          <month>11</month>
          <day>16</day>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Academic Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smedley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>JOB</given-names>
            </name>
            <name name-style="western">
              <surname>Jäger</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Köhler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Holtgrewe</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schubach</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Siragusa</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zemojtel</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Buske</surname>
              <given-names>OJ</given-names>
            </name>
            <name name-style="western">
              <surname>Washington</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Bone</surname>
              <given-names>WP</given-names>
            </name>
            <name name-style="western">
              <surname>Haendel</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>PN</given-names>
            </name>
          </person-group>
          <article-title>Next-generation diagnostics and disease-gene discovery with the Exomiser</article-title>
          <source>Nat Protoc</source>
          <year>2015</year>
          <month>12</month>
          <volume>10</volume>
          <issue>12</issue>
          <fpage>2004</fpage>
          <lpage>15</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26562621"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/nprot.2015.124</pub-id>
          <pub-id pub-id-type="medline">26562621</pub-id>
          <pub-id pub-id-type="pii">nprot.2015.124</pub-id>
          <pub-id pub-id-type="pmcid">PMC5467691</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boudellioua</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Kulmanov</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schofield</surname>
              <given-names>PN</given-names>
            </name>
            <name name-style="western">
              <surname>Gkoutos</surname>
              <given-names>GV</given-names>
            </name>
            <name name-style="western">
              <surname>Hoehndorf</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>DeepPVP: phenotype-based prioritization of causative variants using deep learning</article-title>
          <source>BMC Bioinformatics</source>
          <year>2019</year>
          <month>02</month>
          <day>06</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>65</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-2633-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12859-019-2633-8</pub-id>
          <pub-id pub-id-type="medline">30727941</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12859-019-2633-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6364462</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bustamante</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>WH</given-names>
            </name>
          </person-group>
          <article-title>Xrare: a machine learning method jointly modeling phenotypes and genetic evidence for rare disease diagnosis</article-title>
          <source>Genet Med</source>
          <year>2019</year>
          <month>09</month>
          <volume>21</volume>
          <issue>9</issue>
          <fpage>2126</fpage>
          <lpage>2134</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30675030"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41436-019-0439-8</pub-id>
          <pub-id pub-id-type="medline">30675030</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41436-019-0439-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6752318</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holt</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Wilk</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Birch</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Gajapathy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moss</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Sosonkina</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wilk</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Shaterferdosian</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Uno-Antonison</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Weborg</surname>
              <given-names>A</given-names>
            </name>
            <collab>Undiagnosed Diseases Network</collab>
            <name name-style="western">
              <surname>Worthey</surname>
              <given-names>EA</given-names>
            </name>
          </person-group>
          <article-title>VarSight: prioritizing clinically reported variants with binary classification algorithms</article-title>
          <source>BMC Bioinformatics</source>
          <year>2019</year>
          <month>10</month>
          <day>15</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>496</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-3026-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12859-019-3026-8</pub-id>
          <pub-id pub-id-type="medline">31615419</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12859-019-3026-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6792253</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>PN</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Phenolyzer: phenotype-based prioritization of candidate genes for human diseases</article-title>
          <source>Nat Methods</source>
          <year>2015</year>
          <month>09</month>
          <volume>12</volume>
          <issue>9</issue>
          <fpage>841</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26192085"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/nmeth.3484</pub-id>
          <pub-id pub-id-type="medline">26192085</pub-id>
          <pub-id pub-id-type="pii">nmeth.3484</pub-id>
          <pub-id pub-id-type="pmcid">PMC4718403</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De La Vega</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Chowdhury</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Frise</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>McCarthy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>James</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Guidugli</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Agrawal</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Genetti</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Beggs</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Löscher</surname>
              <given-names>Britt-Sabina</given-names>
            </name>
            <name name-style="western">
              <surname>Franke</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Boone</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Õunap</surname>
              <given-names>Katrin</given-names>
            </name>
            <name name-style="western">
              <surname>Pajusalu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huentelman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ramsey</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Naymik</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Narayanan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Veeraraghavan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Billings</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Reese</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Yandell</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kingsmore</surname>
              <given-names>SF</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence enables comprehensive genome interpretation and nomination of candidate diagnoses for rare genetic diseases</article-title>
          <source>Genome Med</source>
          <year>2021</year>
          <month>10</month>
          <day>14</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>153</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-021-00965-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13073-021-00965-0</pub-id>
          <pub-id pub-id-type="medline">34645491</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13073-021-00965-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC8515723</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rentzsch</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Witten</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shendure</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kircher</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>CADD: predicting the deleteriousness of variants throughout the human genome</article-title>
          <source>Nucleic Acids Res</source>
          <year>2019</year>
          <month>01</month>
          <day>08</day>
          <volume>47</volume>
          <issue>D1</issue>
          <fpage>D886</fpage>
          <lpage>D894</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30371827"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gky1016</pub-id>
          <pub-id pub-id-type="medline">30371827</pub-id>
          <pub-id pub-id-type="pii">5146191</pub-id>
          <pub-id pub-id-type="pmcid">PMC6323892</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Jian</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gibbs</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Boerwinkle</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Comparison and integration of deleteriousness prediction methods for nonsynonymous SNVs in whole exome sequencing studies</article-title>
          <source>Hum Mol Genet</source>
          <year>2015</year>
          <month>05</month>
          <day>15</day>
          <volume>24</volume>
          <issue>8</issue>
          <fpage>2125</fpage>
          <lpage>37</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25552646"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/hmg/ddu733</pub-id>
          <pub-id pub-id-type="medline">25552646</pub-id>
          <pub-id pub-id-type="pii">ddu733</pub-id>
          <pub-id pub-id-type="pmcid">PMC4375422</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singleton</surname>
              <given-names>MV</given-names>
            </name>
            <name name-style="western">
              <surname>Guthery</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Voelkerding</surname>
              <given-names>KV</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kennedy</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Margraf</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Durtschi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Eilbeck</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Reese</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Jorde</surname>
              <given-names>LB</given-names>
            </name>
            <name name-style="western">
              <surname>Huff</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Yandell</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Phevor combines multiple biomedical ontologies for accurate identification of disease-causing alleles in single individuals and small nuclear families</article-title>
          <source>Am J Hum Genet</source>
          <year>2014</year>
          <month>04</month>
          <day>03</day>
          <volume>94</volume>
          <issue>4</issue>
          <fpage>599</fpage>
          <lpage>610</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0002-9297(14)00112-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ajhg.2014.03.010</pub-id>
          <pub-id pub-id-type="medline">24702956</pub-id>
          <pub-id pub-id-type="pii">S0002-9297(14)00112-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC3980410</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adzhubei</surname>
              <given-names>IA</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Peshkin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ramensky</surname>
              <given-names>VE</given-names>
            </name>
            <name name-style="western">
              <surname>Gerasimova</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bork</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kondrashov</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Sunyaev</surname>
              <given-names>SR</given-names>
            </name>
          </person-group>
          <article-title>A method and server for predicting damaging missense mutations</article-title>
          <source>Nat Methods</source>
          <year>2010</year>
          <month>04</month>
          <volume>7</volume>
          <issue>4</issue>
          <fpage>248</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20354512"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/nmeth0410-248</pub-id>
          <pub-id pub-id-type="medline">20354512</pub-id>
          <pub-id pub-id-type="pii">nmeth0410-248</pub-id>
          <pub-id pub-id-type="pmcid">PMC2855889</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Henikoff</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>SIFT: Predicting amino acid changes that affect protein function</article-title>
          <source>Nucleic Acids Res</source>
          <year>2003</year>
          <month>07</month>
          <day>01</day>
          <volume>31</volume>
          <issue>13</issue>
          <fpage>3812</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/12824425"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkg509</pub-id>
          <pub-id pub-id-type="medline">12824425</pub-id>
          <pub-id pub-id-type="pmcid">PMC168916</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schwarz</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>DN</given-names>
            </name>
            <name name-style="western">
              <surname>Schuelke</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Seelow</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>MutationTaster2: mutation prediction for the deep-sequencing age</article-title>
          <source>Nat Methods</source>
          <year>2014</year>
          <month>04</month>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>361</fpage>
          <lpage>2</lpage>
          <pub-id pub-id-type="doi">10.1038/nmeth.2890</pub-id>
          <pub-id pub-id-type="medline">24681721</pub-id>
          <pub-id pub-id-type="pii">nmeth.2890</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>PN</given-names>
            </name>
            <name name-style="western">
              <surname>Köhler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Seelow</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Horn</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mundlos</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The Human Phenotype Ontology: a tool for annotating and analyzing human hereditary disease</article-title>
          <source>Am J Hum Genet</source>
          <year>2008</year>
          <month>11</month>
          <volume>83</volume>
          <issue>5</issue>
          <fpage>610</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://linkinghub.elsevier.com/retrieve/pii/S0002-9297(08)00535-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ajhg.2008.09.017</pub-id>
          <pub-id pub-id-type="medline">18950739</pub-id>
          <pub-id pub-id-type="pii">S0002-9297(08)00535-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC2668030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>An integrated genetic variation analysis system for gene diagnostics in precision medicine (Master's thesis)</article-title>
          <source>NDLTD</source>
          <year>2018</year>
          <access-date>2022-08-31</access-date>
          <publisher-loc>Taipei City, Taiwan</publisher-loc>
          <publisher-name>National Taiwan University</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hdl.handle.net/11296/v9rcd8">https://hdl.handle.net/11296/v9rcd8</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>ChenT-F</collab>
          </person-group>
          <article-title>Variants Prioritizer for Exome Data Based on Text-mining</article-title>
          <source>NTU Thesis and Dissertations Repository</source>
          <year>2018</year>
          <access-date>2022-08-31</access-date>
          <publisher-loc>Taipei City, Taiwan</publisher-loc>
          <publisher-name>National Taiwan University</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tdr.lib.ntu.edu.tw/handle/123456789/17687?mode=full">https://tdr.lib.ntu.edu.tw/handle/123456789/17687?mode=full</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hakonarson</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>ANNOVAR: functional annotation of genetic variants from high-throughput sequencing data</article-title>
          <source>Nucleic Acids Res</source>
          <year>2010</year>
          <month>09</month>
          <volume>38</volume>
          <issue>16</issue>
          <fpage>e164</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20601685"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkq603</pub-id>
          <pub-id pub-id-type="medline">20601685</pub-id>
          <pub-id pub-id-type="pii">gkq603</pub-id>
          <pub-id pub-id-type="pmcid">PMC2938201</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McLaren</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gil</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hunt</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Riat</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Ritchie</surname>
              <given-names>GRS</given-names>
            </name>
            <name name-style="western">
              <surname>Thormann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Flicek</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cunningham</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>The Ensembl Variant Effect Predictor</article-title>
          <source>Genome Biol</source>
          <year>2016</year>
          <month>06</month>
          <day>06</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>122</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0974-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13059-016-0974-4</pub-id>
          <pub-id pub-id-type="medline">27268795</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13059-016-0974-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC4893825</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stromberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lajugie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Margulies</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Nirvana: Clinical Grade Variant Annotator</article-title>
          <year>2017</year>
          <conf-name>The 8th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics</conf-name>
          <conf-date>August 20-23, 2017</conf-date>
          <conf-loc>Boston, MA</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <pub-id pub-id-type="doi">10.1145/3107411.3108204</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>InterVar: Clinical Interpretation of Genetic Variants by the 2015 ACMG-AMP Guidelines</article-title>
          <source>Am J Hum Genet</source>
          <year>2017</year>
          <month>02</month>
          <day>02</day>
          <volume>100</volume>
          <issue>2</issue>
          <fpage>267</fpage>
          <lpage>280</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0002-9297(17)30004-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ajhg.2017.01.004</pub-id>
          <pub-id pub-id-type="medline">28132688</pub-id>
          <pub-id pub-id-type="pii">S0002-9297(17)30004-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC5294755</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Landrum</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>GR</given-names>
            </name>
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Rubinstein</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Church</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Maglott</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>ClinVar: public archive of relationships among sequence variation and human phenotype</article-title>
          <source>Nucleic Acids Res</source>
          <year>2014</year>
          <month>01</month>
          <volume>42</volume>
          <issue>Database issue</issue>
          <fpage>D980</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://nar.oxfordjournals.org/cgi/pmidlookup?view=long&amp;pmid=24234437"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkt1113</pub-id>
          <pub-id pub-id-type="medline">24234437</pub-id>
          <pub-id pub-id-type="pii">gkt1113</pub-id>
          <pub-id pub-id-type="pmcid">PMC3965032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stenson</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>Ball</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Mort</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Shiel</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>NST</given-names>
            </name>
            <name name-style="western">
              <surname>Abeysinghe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Krawczak</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>DN</given-names>
            </name>
          </person-group>
          <article-title>Human Gene Mutation Database (HGMD): 2003 update</article-title>
          <source>Hum Mutat</source>
          <year>2003</year>
          <month>07</month>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>577</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1002/humu.10212</pub-id>
          <pub-id pub-id-type="medline">12754702</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Taiwan Biobank: a project aiming to aid Taiwan's transition into a biomedical island</article-title>
          <source>Pharmacogenomics</source>
          <year>2008</year>
          <month>02</month>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>235</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.2217/14622416.9.2.235</pub-id>
          <pub-id pub-id-type="medline">18370851</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aziz</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bale</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bick</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gastier-Foster</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Grody</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Hegde</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lyon</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Spector</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Voelkerding</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rehm</surname>
              <given-names>HL</given-names>
            </name>
            <collab>ACMG Laboratory Quality Assurance Committee</collab>
          </person-group>
          <article-title>Standards and guidelines for the interpretation of sequence variants: a joint consensus recommendation of the American College of Medical Genetics and Genomics and the Association for Molecular Pathology</article-title>
          <source>Genet Med</source>
          <year>2015</year>
          <month>05</month>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>405</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25741868"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/gim.2015.30</pub-id>
          <pub-id pub-id-type="medline">25741868</pub-id>
          <pub-id pub-id-type="pii">gim201530</pub-id>
          <pub-id pub-id-type="pmcid">PMC4544753</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Glassberg</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Harpak</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lant</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Pritchard</surname>
              <given-names>JK</given-names>
            </name>
          </person-group>
          <article-title>Measurement of selective constraint on human gene expression</article-title>
          <source>bioRxiv</source>
          <comment>2022</comment>
          <pub-id pub-id-type="doi">10.1101/345801</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>AR</given-names>
            </name>
          </person-group>
          <article-title>Effective mapping of biomedical text to the UMLS Metathesaurus: the MetaMap program</article-title>
          <source>Proc AMIA Symp</source>
          <year>2001</year>
          <fpage>17</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/11825149"/>
          </comment>
          <pub-id pub-id-type="medline">11825149</pub-id>
          <pub-id pub-id-type="pii">D010001275</pub-id>
          <pub-id pub-id-type="pmcid">PMC2243666</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Lang</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>An overview of MetaMap: historical perspective and recent advances</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>229</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20442139"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.002733</pub-id>
          <pub-id pub-id-type="medline">20442139</pub-id>
          <pub-id pub-id-type="pii">17/3/229</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lindberg</surname>
              <given-names>DAB</given-names>
            </name>
            <name name-style="western">
              <surname>Humphreys</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>McCray</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>The Unified Medical Language System</article-title>
          <source>Yearb Med Inform</source>
          <year>2018</year>
          <month>03</month>
          <day>05</day>
          <volume>02</volume>
          <issue>01</issue>
          <fpage>41</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.1055/s-0038-1637976</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Peres Kury</surname>
              <given-names>FS</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ta</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Doc2Hpo: a web application for efficient and accurate HPO concept curation</article-title>
          <source>Nucleic Acids Res</source>
          <year>2019</year>
          <month>07</month>
          <day>02</day>
          <volume>47</volume>
          <issue>W1</issue>
          <fpage>W566</fpage>
          <lpage>W570</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31106327"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkz386</pub-id>
          <pub-id pub-id-type="medline">31106327</pub-id>
          <pub-id pub-id-type="pii">5491745</pub-id>
          <pub-id pub-id-type="pmcid">PMC6602487</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tchechmedjiev</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Abdaoui</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Emonet</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Melzi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jonnagaddala</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jonquet</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Enhanced functionalities for annotating and indexing clinical text with the NCBO Annotator</article-title>
          <source>Bioinformatics</source>
          <year>2018</year>
          <month>06</month>
          <day>01</day>
          <volume>34</volume>
          <issue>11</issue>
          <fpage>1962</fpage>
          <lpage>1965</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29846492"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/bty009</pub-id>
          <pub-id pub-id-type="medline">29846492</pub-id>
          <pub-id pub-id-type="pii">4802221</pub-id>
          <pub-id pub-id-type="pmcid">PMC5972606</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>IDF revisited: A simple new derivation within the Robertson-Spärck Jones probabilistic model</article-title>
          <year>2007</year>
          <month>7</month>
          <conf-name>SIGIR '07: Proceedings of the 30th annual international ACM SIGIR conference on Research and development in information</conf-name>
          <conf-date>July 23-27, 2007</conf-date>
          <conf-loc>Amsterdam, The Netherlands</conf-loc>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>ACM</publisher-name>
          <fpage>751</fpage>
          <lpage>752</lpage>
          <pub-id pub-id-type="doi">10.1145/1277741.1277891</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robertson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Walker</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Beaulieu</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Okapi at TREC-7: automatic ad hoc, filtering, VCL and interactive track</article-title>
          <source>Microsoft</source>
          <year>1999</year>
          <month>01</month>
          <access-date>2022-08-31</access-date>
          <publisher-loc>Gaithersburg, MD</publisher-loc>
          <publisher-name>National Institute of Standards and Technology</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/okapi_trec7.pdf">https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/okapi_trec7.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maglott</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ostell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pruitt</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Tatusova</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Entrez Gene: gene-centered information at NCBI</article-title>
          <source>Nucleic Acids Res</source>
          <year>2005</year>
          <month>01</month>
          <day>01</day>
          <volume>33</volume>
          <issue>Database issue</issue>
          <fpage>D54</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/15608257"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gki031</pub-id>
          <pub-id pub-id-type="medline">15608257</pub-id>
          <pub-id pub-id-type="pii">33/suppl_1/D54</pub-id>
          <pub-id pub-id-type="pmcid">PMC539985</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>PubTator: a web-based text mining tool for assisting biocuration</article-title>
          <source>Nucleic Acids Res</source>
          <year>2013</year>
          <month>07</month>
          <volume>41</volume>
          <issue>Web Server issue</issue>
          <fpage>W518</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://nar.oxfordjournals.org/cgi/pmidlookup?view=long&amp;pmid=23703206"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkt441</pub-id>
          <pub-id pub-id-type="medline">23703206</pub-id>
          <pub-id pub-id-type="pii">gkt441</pub-id>
          <pub-id pub-id-type="pmcid">PMC3692066</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hintzsche</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>WA</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>AC</given-names>
            </name>
          </person-group>
          <article-title>A Survey of Computational Tools to Analyze and Interpret Whole Exome Sequencing Data</article-title>
          <source>Int J Genomics</source>
          <year>2016</year>
          <volume>2016</volume>
          <fpage>7983236</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2016/7983236"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2016/7983236</pub-id>
          <pub-id pub-id-type="medline">28070503</pub-id>
          <pub-id pub-id-type="pmcid">PMC5192301</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Doniger</surname>
              <given-names>SW</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Swain</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Corcuera</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fay</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>A catalog of neutral and deleterious polymorphism in yeast</article-title>
          <source>PLoS Genet</source>
          <year>2008</year>
          <month>08</month>
          <day>29</day>
          <volume>4</volume>
          <issue>8</issue>
          <fpage>e1000183</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pgen.1000183"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pgen.1000183</pub-id>
          <pub-id pub-id-type="medline">18769710</pub-id>
          <pub-id pub-id-type="pmcid">PMC2515631</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reva</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Antipin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sander</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Predicting the functional impact of protein mutations: application to cancer genomics</article-title>
          <source>Nucleic Acids Res</source>
          <year>2011</year>
          <month>09</month>
          <day>01</day>
          <volume>39</volume>
          <issue>17</issue>
          <fpage>e118</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21727090"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkr407</pub-id>
          <pub-id pub-id-type="medline">21727090</pub-id>
          <pub-id pub-id-type="pii">gkr407</pub-id>
          <pub-id pub-id-type="pmcid">PMC3177186</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shihab</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Gough</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>DN</given-names>
            </name>
            <name name-style="western">
              <surname>Stenson</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>Barker</surname>
              <given-names>GLA</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Day</surname>
              <given-names>INM</given-names>
            </name>
            <name name-style="western">
              <surname>Gaunt</surname>
              <given-names>TR</given-names>
            </name>
          </person-group>
          <article-title>Predicting the functional, molecular, and phenotypic consequences of amino acid substitutions using hidden Markov models</article-title>
          <source>Hum Mutat</source>
          <year>2013</year>
          <month>01</month>
          <volume>34</volume>
          <issue>1</issue>
          <fpage>57</fpage>
          <lpage>65</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/humu.22225"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/humu.22225</pub-id>
          <pub-id pub-id-type="medline">23033316</pub-id>
          <pub-id pub-id-type="pmcid">PMC3558800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sims</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>AP</given-names>
            </name>
          </person-group>
          <article-title>Predicting the functional effect of amino acid substitutions and indels</article-title>
          <source>PLoS One</source>
          <year>2012</year>
          <volume>7</volume>
          <issue>10</issue>
          <fpage>e46688</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0046688"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0046688</pub-id>
          <pub-id pub-id-type="medline">23056405</pub-id>
          <pub-id pub-id-type="pii">PONE-D-12-10334</pub-id>
          <pub-id pub-id-type="pmcid">PMC3466303</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jagadeesh</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Wenger</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Berger</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Guturu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Stenson</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>DN</given-names>
            </name>
            <name name-style="western">
              <surname>Bernstein</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Bejerano</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>M-CAP eliminates a majority of variants of uncertain significance in clinical exomes at high sensitivity</article-title>
          <source>Nat Genet</source>
          <year>2016</year>
          <month>10</month>
          <day>24</day>
          <volume>48</volume>
          <issue>12</issue>
          <fpage>1581</fpage>
          <lpage>1586</lpage>
          <pub-id pub-id-type="doi">10.1038/ng.3703</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davydov</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Goode</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Sirota</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Sidow</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Batzoglou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Identifying a high fraction of the human genome to be under selective constraint using GERP++</article-title>
          <source>PLoS Comput Biol</source>
          <year>2010</year>
          <month>12</month>
          <day>02</day>
          <volume>6</volume>
          <issue>12</issue>
          <fpage>e1001025</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pcbi.1001025"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1001025</pub-id>
          <pub-id pub-id-type="medline">21152010</pub-id>
          <pub-id pub-id-type="pmcid">PMC2996323</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Quang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>DANN: a deep learning approach for annotating the pathogenicity of genetic variants</article-title>
          <source>Bioinformatics</source>
          <year>2015</year>
          <month>03</month>
          <day>01</day>
          <volume>31</volume>
          <issue>5</issue>
          <fpage>761</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25338716"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btu703</pub-id>
          <pub-id pub-id-type="medline">25338716</pub-id>
          <pub-id pub-id-type="pii">btu703</pub-id>
          <pub-id pub-id-type="pmcid">PMC4341060</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shihab</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Rogers</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Gough</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mort</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>DN</given-names>
            </name>
            <name name-style="western">
              <surname>Day</surname>
              <given-names>INM</given-names>
            </name>
            <name name-style="western">
              <surname>Gaunt</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>An integrative approach to predicting the functional effects of non-coding and coding sequence variation</article-title>
          <source>Bioinformatics</source>
          <year>2015</year>
          <month>05</month>
          <day>15</day>
          <volume>31</volume>
          <issue>10</issue>
          <fpage>1536</fpage>
          <lpage>43</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25583119"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btv009</pub-id>
          <pub-id pub-id-type="medline">25583119</pub-id>
          <pub-id pub-id-type="pii">btv009</pub-id>
          <pub-id pub-id-type="pmcid">PMC4426838</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cheung</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A statistical framework to predict functional non-coding regions in the human genome through integrated analysis of annotation data</article-title>
          <source>Sci Rep</source>
          <year>2015</year>
          <month>05</month>
          <day>27</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>10576</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/srep10576"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/srep10576</pub-id>
          <pub-id pub-id-type="medline">26015273</pub-id>
          <pub-id pub-id-type="pii">srep10576</pub-id>
          <pub-id pub-id-type="pmcid">PMC4444969</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gulko</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hubisz</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gronau</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Siepel</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A method for calculating probabilities of fitness consequences for point mutations across the human genome</article-title>
          <source>Nat Genet</source>
          <year>2015</year>
          <month>03</month>
          <volume>47</volume>
          <issue>3</issue>
          <fpage>276</fpage>
          <lpage>83</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25599402"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/ng.3196</pub-id>
          <pub-id pub-id-type="medline">25599402</pub-id>
          <pub-id pub-id-type="pii">ng.3196</pub-id>
          <pub-id pub-id-type="pmcid">PMC4342276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hubisz</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Siepel</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>PHAST and RPHAST: phylogenetic analysis with space/time models</article-title>
          <source>Brief Bioinform</source>
          <year>2011</year>
          <month>01</month>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>41</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21278375"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bib/bbq072</pub-id>
          <pub-id pub-id-type="medline">21278375</pub-id>
          <pub-id pub-id-type="pii">bbq072</pub-id>
          <pub-id pub-id-type="pmcid">PMC3030812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siepel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bejerano</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Pedersen</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Hinrichs</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenbloom</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Clawson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Spieth</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hillier</surname>
              <given-names>LW</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weinstock</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Gibbs</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Kent</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Haussler</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes</article-title>
          <source>Genome Res</source>
          <year>2005</year>
          <month>08</month>
          <day>15</day>
          <volume>15</volume>
          <issue>8</issue>
          <fpage>1034</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://genome.cshlp.org/cgi/pmidlookup?view=long&amp;pmid=16024819"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/gr.3715005</pub-id>
          <pub-id pub-id-type="medline">16024819</pub-id>
          <pub-id pub-id-type="pii">gr.3715005</pub-id>
          <pub-id pub-id-type="pmcid">PMC1182216</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garber</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Guttman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Clamp</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zody</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Identifying novel constrained elements by exploiting biased substitution patterns</article-title>
          <source>Bioinformatics</source>
          <year>2009</year>
          <month>07</month>
          <day>15</day>
          <volume>25</volume>
          <issue>12</issue>
          <fpage>i54</fpage>
          <lpage>62</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/19478016"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btp190</pub-id>
          <pub-id pub-id-type="medline">19478016</pub-id>
          <pub-id pub-id-type="pii">btp190</pub-id>
          <pub-id pub-id-type="pmcid">PMC2687944</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Boerwinkle</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>dbNSFP v3.0: A One-Stop Database of Functional Predictions and Annotations for Human Nonsynonymous and Splice-Site SNVs</article-title>
          <source>Hum Mutat</source>
          <year>2016</year>
          <month>03</month>
          <volume>37</volume>
          <issue>3</issue>
          <fpage>235</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26555599"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/humu.22932</pub-id>
          <pub-id pub-id-type="medline">26555599</pub-id>
          <pub-id pub-id-type="pmcid">PMC4752381</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in python</article-title>
          <source>Journal of Machine Learning Research</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>2830</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/pdf/10.5555/1953048.2078195"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kraskov</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stögbauer</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Grassberger</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Estimating mutual information</article-title>
          <source>Phys. Rev. E</source>
          <year>2004</year>
          <month>6</month>
          <day>23</day>
          <volume>69</volume>
          <issue>6</issue>
          <fpage>066138-1</fpage>
          <lpage>066138-16</lpage>
          <pub-id pub-id-type="doi">10.1103/physreve.69.066138</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>Mutual information between discrete and continuous data sets</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <month>2</month>
          <day>19</day>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>e87357</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0087357"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0087357</pub-id>
          <pub-id pub-id-type="medline">24586270</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-49753</pub-id>
          <pub-id pub-id-type="pmcid">PMC3929353</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Machine Learning</source>
          <year>2001</year>
          <volume>45</volume>
          <fpage>5</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://link.springer.com/content/pdf/10.1023/A:1010933404324.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Consistency for a simple model of random forests</article-title>
          <source>University of California, Berkeley</source>
          <year>2004</year>
          <access-date>2022-08-31</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.stat.berkeley.edu/~breiman/RandomForests/consistencyRFA.pdf">https://www.stat.berkeley.edu/~breiman/RandomForests/consistencyRFA.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ellerman</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Logical Entropy: Introduction to Classical and Quantum Logical Information Theory</article-title>
          <source>Entropy (Basel)</source>
          <year>2018</year>
          <month>10</month>
          <day>06</day>
          <volume>20</volume>
          <issue>9</issue>
          <fpage>679</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=e20090679"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/e20090679</pub-id>
          <pub-id pub-id-type="medline">33265768</pub-id>
          <pub-id pub-id-type="pii">e20090679</pub-id>
          <pub-id pub-id-type="pmcid">PMC7513204</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birgmeier</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Haeussler</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deisseroth</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Jagadeesh</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Ratner</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Guturu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wenger</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Stenson</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>DN</given-names>
            </name>
            <name name-style="western">
              <surname>Ré</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bernstein</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Bejerano</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>AMELIE accelerates Mendelian patient diagnosis directly from the primary literature</article-title>
          <source>bioRxiv</source>
          <comment>Preprint posted online on August 02, 2017</comment>
          <pub-id pub-id-type="doi">10.1101/171322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stelzer</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Plaschkes</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Oz-Levi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Alkelai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Olender</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zimmerman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Twik</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Belinky</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Fishilevich</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nudel</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Guan-Golan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Warshawsky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dahary</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kohn</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mazor</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Iny Stein</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Baris</surname>
              <given-names>HN</given-names>
            </name>
            <name name-style="western">
              <surname>Rappaport</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Safran</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lancet</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>VarElect: the phenotype-based variation prioritizer of the GeneCards Suite</article-title>
          <source>BMC Genomics</source>
          <year>2016</year>
          <month>06</month>
          <day>23</day>
          <volume>17 Suppl 2</volume>
          <issue>S2</issue>
          <fpage>444</fpage>
          <lpage>206</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-016-2722-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12864-016-2722-2</pub-id>
          <pub-id pub-id-type="medline">27357693</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12864-016-2722-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4928145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Hildreth</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Batalov</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chowdhury</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Watkins</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ellsworth</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Camp</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kint</surname>
              <given-names>CI</given-names>
            </name>
            <name name-style="western">
              <surname>Yacoubian</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Farnaes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bainbridge</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Beebe</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Braun</surname>
              <given-names>JJA</given-names>
            </name>
            <name name-style="western">
              <surname>Bray</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cakici</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Caylor</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Creed</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Frith</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gain</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gaughran</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gilmer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gleeson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gore</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Grunenwald</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hovey</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Janes</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>McDonagh</surname>
              <given-names>PD</given-names>
            </name>
            <name name-style="western">
              <surname>McBride</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mulrooney</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Nahas</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Oriol</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Puckett</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rady</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Reese</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salz</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sanford</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sweeney</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tokita</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Van Der Kraan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wigby</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Yamada</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schols</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Reynders</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dimmock</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Veeraraghavan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Defay</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kingsmore</surname>
              <given-names>SF</given-names>
            </name>
          </person-group>
          <article-title>Diagnosis of genetic diseases in seriously ill children by rapid whole-genome sequencing and automated phenotyping and interpretation</article-title>
          <source>Sci Transl Med</source>
          <year>2019</year>
          <month>04</month>
          <day>24</day>
          <volume>11</volume>
          <issue>489</issue>
          <fpage>eaat6177</fpage>
          <pub-id pub-id-type="doi">10.1126/scitranslmed.aat6177</pub-id>
          <pub-id pub-id-type="medline">31019026</pub-id>
          <pub-id pub-id-type="pii">11/489/eaat6177</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smedley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Oellrich</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Köhler</surname>
              <given-names>Sebastian</given-names>
            </name>
            <name name-style="western">
              <surname>Ruef</surname>
              <given-names>B</given-names>
            </name>
            <collab>Sanger Mouse Genetics Project</collab>
            <name name-style="western">
              <surname>Westerfield</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mungall</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>PhenoDigm: analyzing curated annotations to associate animal models with human diseases</article-title>
          <source>Database (Oxford)</source>
          <year>2013</year>
          <volume>2013</volume>
          <fpage>bat025</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/database/article-lookup/doi/10.1093/database/bat025"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/bat025</pub-id>
          <pub-id pub-id-type="medline">23660285</pub-id>
          <pub-id pub-id-type="pii">bat025</pub-id>
          <pub-id pub-id-type="pmcid">PMC3649640</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Washington</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Haendel</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Mungall</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ashburner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Westerfield</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>Linking human diseases to animal models using ontology-based phenotype annotation</article-title>
          <source>PLoS Biol</source>
          <year>2009</year>
          <month>12</month>
          <day>24</day>
          <volume>7</volume>
          <issue>11</issue>
          <fpage>e1000247</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pbio.1000247"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pbio.1000247</pub-id>
          <pub-id pub-id-type="medline">19956802</pub-id>
          <pub-id pub-id-type="pmcid">PMC2774506</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ayadi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Birling</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bottomley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bussell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fuchs</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fray</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gailus-Durner</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Greenaway</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Houghton</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Karp</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Leblanc</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lengger</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Maier</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Mallon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marschall</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Melvin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlovic</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ryder</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Skarnes</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Selloum</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ramirez-Solis</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sorg</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Teboul</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Vasseur</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Walling</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weaver</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wells</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Bradley</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Steel</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Hrabě de Angelis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Herault</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Mouse large-scale phenotyping initiatives: overview of the European Mouse Disease Clinic (EUMODIC) and of the Wellcome Trust Sanger Institute Mouse Genetics Project</article-title>
          <source>Mamm Genome</source>
          <year>2012</year>
          <month>10</month>
          <volume>23</volume>
          <issue>9-10</issue>
          <fpage>600</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22961258"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00335-012-9418-y</pub-id>
          <pub-id pub-id-type="medline">22961258</pub-id>
          <pub-id pub-id-type="pmcid">PMC3463797</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bult</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Eppig</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Blake</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Kadin</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>JE</given-names>
            </name>
            <collab>Mouse Genome Database Group</collab>
          </person-group>
          <article-title>The mouse genome database: genotypes, phenotypes, and models of human disease</article-title>
          <source>Nucleic Acids Res</source>
          <year>2013</year>
          <month>01</month>
          <volume>41</volume>
          <issue>Database issue</issue>
          <fpage>D885</fpage>
          <lpage>91</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23175610"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gks1115</pub-id>
          <pub-id pub-id-type="medline">23175610</pub-id>
          <pub-id pub-id-type="pii">gks1115</pub-id>
          <pub-id pub-id-type="pmcid">PMC3531104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Howe</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Bradford</surname>
              <given-names>YM</given-names>
            </name>
            <name name-style="western">
              <surname>Conlin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Eagle</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Fashena</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Frazer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Knight</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mani</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Moxon</surname>
              <given-names>SAT</given-names>
            </name>
            <name name-style="western">
              <surname>Paddock</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pich</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ramachandran</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ruef</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ruzicka</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Schaper</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Singer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sprunger</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Van Slyke</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Westerfield</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ZFIN, the Zebrafish Model Organism Database: increased support for mutants and transgenics</article-title>
          <source>Nucleic Acids Res</source>
          <year>2013</year>
          <month>01</month>
          <volume>41</volume>
          <issue>Database issue</issue>
          <fpage>D854</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23074187"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gks938</pub-id>
          <pub-id pub-id-type="medline">23074187</pub-id>
          <pub-id pub-id-type="pii">gks938</pub-id>
          <pub-id pub-id-type="pmcid">PMC3531097</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
