Extract Access to Healthcare Entities from Social Determinants of Health Texts

Description

This model extracts access to healthcare information related to Social Determinants of Health from various kinds of biomedical documents.

Predicted Entities

Insurance_Status, Healthcare_Institution, Access_To_Care

Live Demo Open in Colab Copy S3 URI

How to use

document_assembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")

sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en")\
    .setInputCols(["document"])\
    .setOutputCol("sentence")

tokenizer = Tokenizer()\
    .setInputCols(["sentence"])\
    .setOutputCol("token")

clinical_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
    .setInputCols(["sentence", "token"])\
    .setOutputCol("embeddings")

ner_model = MedicalNerModel.pretrained("ner_sdoh_access_to_healthcare_wip", "en", "clinical/models")\
    .setInputCols(["sentence", "token","embeddings"])\
    .setOutputCol("ner")

ner_converter = NerConverterInternal()\
    .setInputCols(["sentence", "token", "ner"])\
    .setOutputCol("ner_chunk")

pipeline = Pipeline(stages=[
    document_assembler, 
    sentence_detector,
    tokenizer,
    clinical_embeddings,
    ner_model,
    ner_converter   
    ])

sample_texts = ["She has a pension and private health insurance, she reports feeling lonely and isolated.",
             "He also reported food insecurityduring his childhood and lack of access to adequate healthcare.",
               "She used to work as a unit clerk at XYZ Medical Center."]


data = spark.createDataFrame(sample_texts, StringType()).toDF("text")

result = pipeline.fit(data).transform(data)
val document_assembler = new DocumentAssembler()
    .setInputCol("text")
    .setOutputCol("document")

val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en")
    .setInputCols("document")
    .setOutputCol("sentence")

val tokenizer = new Tokenizer()
    .setInputCols("sentence")
    .setOutputCol("token")

val clinical_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
    .setInputCols(Array("sentence", "token"))
    .setOutputCol("embeddings")

val ner_model = MedicalNerModel.pretrained("ner_sdoh_access_to_healthcare_wip", "en", "clinical/models")
    .setInputCols(Array("sentence", "token","embeddings"))
    .setOutputCol("ner")

val ner_converter = new NerConverterInternal()
    .setInputCols(Array("sentence", "token", "ner"))
    .setOutputCol("ner_chunk")

val pipeline = new Pipeline().setStages(Array(
    document_assembler, 
    sentence_detector,
    tokenizer,
    clinical_embeddings,
    ner_model,
    ner_converter   
))

val data = Seq("She has a pension and private health insurance, she reports feeling lonely and isolated.").toDS.toDF("text")

val result = pipeline.fit(data).transform(data)

Results

+-----------------------------+-----+---+----------------------+
|chunk                        |begin|end|ner_label             |
+-----------------------------+-----+---+----------------------+
|private health insurance     |22   |45 |Insurance_Status      |
|access to adequate healthcare|65   |93 |Access_To_Care        |
|XYZ Medical Center           |36   |53 |Healthcare_Institution|
+-----------------------------+-----+---+----------------------+

Model Information

Model Name: ner_sdoh_access_to_healthcare_wip
Compatibility: Healthcare NLP 4.3.1+
License: Licensed
Edition: Official
Input Labels: [sentence, token, embeddings]
Output Labels: [ner]
Language: en
Size: 3.0 MB

Benchmarking

                 label	   tp	  fp	  fn	total	precision 	recall	      f1
Healthcare_Institution	 94.0	 8.0	 5.0	 99.0	 0.921569	0.949495	0.935323
        Access_To_Care	561.0	23.0	38.0	599.0	 0.960616	0.936561	0.948436
      Insurance_Status	 60.0	 5.0	 3.0	 63.0	 0.923077	0.952381	0.937500