Extract Community Condition Entities from Social Determinants of Health Texts

Description

This model extracts community condition information related to Social Determinants of Health from various kinds of biomedical documents.

Predicted Entities

Transportation, Community_Living_Conditions, Housing, Food_Insecurity

Live Demo Open in Colab Copy S3 URI

How to use

document_assembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")

sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en")\
    .setInputCols(["document"])\
    .setOutputCol("sentence")

tokenizer = Tokenizer()\
    .setInputCols(["sentence"])\
    .setOutputCol("token")

clinical_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
    .setInputCols(["sentence", "token"])\
    .setOutputCol("embeddings")

ner_model = MedicalNerModel.pretrained("ner_sdoh_community_condition_wip", "en", "clinical/models")\
    .setInputCols(["sentence", "token","embeddings"])\
    .setOutputCol("ner")

ner_converter = NerConverterInternal()\
    .setInputCols(["sentence", "token", "ner"])\
    .setOutputCol("ner_chunk")

pipeline = Pipeline(stages=[
    document_assembler, 
    sentence_detector,
    tokenizer,
    clinical_embeddings,
    ner_model,
    ner_converter   
    ])

sample_texts = ["He is currently experiencing financial stress due to job insecurity, and he lives in a small apartment in a densely populated area with limited access to green spaces and outdoor recreational activities.",
             "Patient reports difficulty affording healthy food, and relies oncheaper, processed options.",
               "She reports her husband and sons provide transportation top medical apptsand do her grocery shopping."]


data = spark.createDataFrame(sample_texts, StringType()).toDF("text")

result = pipeline.fit(data).transform(data)
val document_assembler = new DocumentAssembler()
    .setInputCol("text")
    .setOutputCol("document")

val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "en")
    .setInputCols("document")
    .setOutputCol("sentence")

val tokenizer = new Tokenizer()
    .setInputCols("sentence")
    .setOutputCol("token")

val clinical_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
    .setInputCols(Array("sentence", "token"))
    .setOutputCol("embeddings")

val ner_model = MedicalNerModel.pretrained("ner_sdoh_community_condition_wip", "en", "clinical/models")
    .setInputCols(Array("sentence", "token","embeddings"))
    .setOutputCol("ner")

val ner_converter = new NerConverterInternal()
    .setInputCols(Array("sentence", "token", "ner"))
    .setOutputCol("ner_chunk")

val pipeline = new Pipeline().setStages(Array(
    document_assembler, 
    sentence_detector,
    tokenizer,
    clinical_embeddings,
    ner_model,
    ner_converter   
))

val data = Seq("He is currently experiencing financial stress due to job insecurity, and he lives in a small apartment in a densely populated area with limited access to green spaces and outdoor recreational activities.").toDS.toDF("text")

val result = pipeline.fit(data).transform(data)

Results

+-------------------------------+-----+---+---------------------------+
|chunk                          |begin|end|ner_label                  |
+-------------------------------+-----+---+---------------------------+
|small apartment                |87   |101|Housing                    |
|green spaces                   |154  |165|Community_Living_Conditions|
|outdoor recreational activities|171  |201|Community_Living_Conditions|
|healthy food                   |37   |48 |Food_Insecurity            |
|transportation                 |41   |54 |Transportation             |
+-------------------------------+-----+---+---------------------------+

Model Information

Model Name: ner_sdoh_community_condition_wip
Compatibility: Healthcare NLP 4.3.1+
License: Licensed
Edition: Official
Input Labels: [sentence, token, embeddings]
Output Labels: [ner]
Language: en
Size: 3.0 MB

Benchmarking

                      label 	 tp	  fp	  fn	total	precision	  recall	      f1
            Food_Insecurity	 40.0	 0.0	 5.0	 45.0	 1.000000	0.888889	0.941176
                    Housing	376.0	20.0	28.0	404.0	 0.949495	0.930693	0.940000
Community_Living_Conditions	 97.0	 8.0	 8.0	105.0	 0.923810	0.923810	0.923810
             Transportation	 31.0	 2.0	 0.0	 31.0	 0.939394	1.000000	0.968750