CeLLaTe / app.py
Mardiyyah's picture
Update app.py
8ff36ae verified
from transformers import pipeline
import gradio as gr
from flair.data import Sentence
from flair.models import SequenceTagger
import torch
#constants
GR_TXT1 = """
Aside from in vivo models, numerous studies investigating bacterial virulence and pathogenesis have also
employed in vitro cell line models to gain an initial understanding of the intricate host-pathogen interactions.
These studies, which are simpler and more cost-effective than those using in vivo models,
serve as the foundation for many in vivo studies by providing additional data to support any conclusions [12].
Epithelial mucous membrane cells are the primary focus of most in vitro investigations due to them being usually the initial point of contact for infections [12,13].
HeLa cells, which originate from human cervical epithelial cells, are thus frequently selected for bacterial adhesion and invasion and are particularly suitable for experiments [14].
A. baumannii frequently infects human epithelial tissues, such as the respiratory system, skin and mucosal linings [15].
HeLa cells are resilient and readily cultured in vitro, exhibiting a rapid growth rate.
This ensures the availability of a uniform and consistent cell population for studies, rendering them economical and reliable.
"""
GR_TXT2 = """
Based on the limma R package, a total of 2578 (DEGs 1398 downregulated and 1188 upregulated) were screened out from GEO: GSE225819 data,
including 20 normal samples and 20 GIST samples with liver metastasis (|log2FC| > 1; P < 0.05), suggesting that these DEGs may be involved in liver metastasis in GIST patients (Figure ​(Figure1A).1A).
The top 10 upregulated genes were PENK, IGF2, GPR20, CTSL, SCRG1, PNMAL1, NKX3-2, ANO1, PLAT, and BCHE.
The top 10 downregulated genes were ATP4B, GKN1, MT1G, GKN2, ATP4A, SPINK1, TSPAN8, TFF1, KCNE2, and REG1A (Supplementary Table 1).
Based on the Deseq2, 1386 DEGs (939 downregulated and 447 upregulated) were screened out in GSE155880, including seven Imatinib-sensitive samples and seven imatinib-resistant GIST patients (|log2FC| > 1; P < 0.05, Figure ​Figure1B).1B).
The intersection of the two analyses indicated that only IGF2 was involved in the drug resistance regulation and GIST metastasis in these DEGs (Supplementary Table 2).
Moreover, we evaluated IGF2 expression in the GIST cell line. By western blotting, expression levels of IGF2 in GIST882, GIST882-R, GIST-T1, and GIST-T1-R were higher than those in normal RGM-1.
Furthermore, IGF2 was significantly over expressed in GIST882-R/GIST-T1-R compared with other cell lines GIST882/GIST-T1 (P < 0.01, P < 0.001; Figure ​Figure1C).1C).
In addition, the expression levels of IGF2 in culture supernatants were measured using ELISA and compared (Figure ​(Figure1D).1D).
We found that the ELISA and western blot results (P < 0.05, P < 0.001) were similar.
IGF2 expression was high in drug-resistant GIST cell lines, suggesting that IGF2 overexpression may be closely related to drug resistance.
"""
GR_MARKDOWN_TXT = """
# 🧬 OTAR3088 Biomedical NER Demo
Welcome to the **OTAR3088 Entity Extraction for Knowledge discovery Project demo**.
This space showcases models trained to recognize the following biomedical entities:
- **CellLine**
- **CellType**
- **Tissue**
These entities are collectively referred to as **"CeLLaTe"**.
👉 Try it out:
1. Select a model from the dropdown menu.
2. Enter/paste your text into the input box, or use our provided example biomedical paragraphs.
3. View the extracted entities highlighted directly in **"Tagged Entities"** box.
**Note📢:** Models in this demo are continuously updated and improved as part of our ongoing research.
"""
GR_THEME = gr.themes.Soft(
primary_hue="indigo",
secondary_hue="rose",
neutral_hue="gray"
)
MODEL_REGISTRY = {
"CeLLaTe-V2-Model": "OTAR3088/bioformer-CeLLaTe_V2",
"CellFinder-V1-Model": "OTAR3088/bioformer-cellfinder_V1",
"CeLLaTe-V1-Model": "OTAR3088/bioformer-cellate_V1",
"Flair-CeLLaTe-Model": "OTAR3088/flair-microsoft-cellate_cellfinder-V1"
}
hf_pipes = {}
flair_pipes = {}
def load_model(model_name):
if model_name.lower().startswith("flair"):
if not model_name in flair_pipes:
# Force default tensor type
torch.set_default_dtype(torch.float32)
flair_pipes[model_name] = SequenceTagger.load(MODEL_REGISTRY[model_name])
flair_pipes[model_name].to(torch.device("cpu"))
flair_pipes[model_name].float()
return flair_pipes[model_name], "flair"
else:
if not model_name in hf_pipes:
hf_pipes[model_name] = pipeline("ner", model=MODEL_REGISTRY[model_name], aggregation_strategy='simple')
return hf_pipes[model_name], "hf"
def tagger(text, model_name):
model, model_type = load_model(model_name)
if model_type == "flair":
sentence = Sentence(text)
model.predict(sentence)
entities = [{"start": ent.start_position,
"end": ent.end_position,
"score": ent.score,
"entity": ent.tag} for ent in sentence.get_spans('ner')]
elif model_type == "hf":
entities = model(text)
return {"text": text, "entities": entities}
def gradio_ui():
with gr.Blocks(theme=GR_THEME) as demo:
with gr.Row():
with gr.Column(scale=7):
gr.Markdown(GR_MARKDOWN_TXT,elem_classes="full-width")
input_text = gr.Textbox(label="Enter your text here", type='text', placeholder="Biomedical Input text", lines=8)
gr.Examples(
examples=[GR_TXT1, GR_TXT2],
inputs=[input_text],
label= "Example Biomedical texts to try"
)
run_btn = gr.Button("Submit Text", variant="primary")
with gr.Column(scale=7):
model_choice = gr.Dropdown(choices=list(MODEL_REGISTRY.keys()), label="Select a model for Inference")
output_highlight = gr.HighlightedText(label="Tagged Entities")
# with gr.Row():
# with gr.Column(scale=7):
# input_text = gr.Textbox(label="Enter your text here", type='text', placeholder="Biomedical Input text", lines=8)
# gr.Examples(
# examples=[GR_TXT1, GR_TXT2],
# inputs=[input_text],
# label= "Example Biomedical texts to try"
# )
# run_btn = gr.Button("Submit Text", variant="primary")
run_btn.click(
fn=tagger,
inputs=[input_text, model_choice],
outputs=[output_highlight]
)
return demo
if __name__ == "__main__":
app = gradio_ui()
app.launch()