Description
Identify if tests were conducted on a particular date or any diagnosis was made on a specific date by checking relations between clinical entities and dates. 1 : Shows date and the clinical entity are related, 0 : Shows date and the clinical entity are not related.
Predicted Entities
1
, 0
Live Demo Open in Colab Copy S3 URI
How to use
documenter = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
sentencer = SentenceDetector()\
.setInputCols(["document"])\
.setOutputCol("sentences")
tokenizer = Tokenizer()\
.setInputCols(["sentences"])\
.setOutputCol("tokens")
words_embedder = WordEmbeddingsModel()\
.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentences", "tokens"])\
.setOutputCol("embeddings")
pos_tagger = PerceptronModel()\
.pretrained("pos_clinical", "en", "clinical/models") \
.setInputCols(["sentences", "tokens"])\
.setOutputCol("pos_tags")
events_ner_tagger = MedicalNerModel.pretrained("ner_events_clinical", "en", "clinical/models")\
.setInputCols("sentences", "tokens", "embeddings")\
.setOutputCol("ner_tags")
ner_chunker = NerConverterInternal()\
.setInputCols(["sentences", "tokens", "ner_tags"])\
.setOutputCol("ner_chunks")
dependency_parser = DependencyParserModel() \
.pretrained("dependency_conllu", "en") \
.setInputCols(["sentences", "pos_tags", "tokens"]) \
.setOutputCol("dependencies")
events_re_ner_chunk_filter = RENerChunksFilter() \
.setInputCols(["ner_chunks", "dependencies"])\
.setOutputCol("re_ner_chunks")
events_re_Model = RelationExtractionDLModel() \
.pretrained('redl_date_clinical_biobert', "en", "clinical/models")\
.setPredictionThreshold(0.5)\
.setInputCols(["re_ner_chunks", "sentences"]) \
.setOutputCol("relations")
pipeline = Pipeline(stages=[
documenter,
sentencer,
tokenizer,
words_embedder,
pos_tagger,
events_ner_tagger,
ner_chunker,
dependency_parser,
events_re_ner_chunk_filter,
events_re_Model])
data = spark.createDataFrame([['''This 73 y/o patient had CT on 1/12/95, with progressive memory and cognitive decline since 8/11/94.''']]).toDF("text")
result = pipeline.fit(data).transform(data)
val documenter = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentencer = new SentenceDetector()
.setInputCols("document")
.setOutputCol("sentences")
val tokenizer = new Tokenizer()
.setInputCols("sentences")
.setOutputCol("tokens")
val words_embedder = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
val pos_tagger = PerceptronModel()
.pretrained("pos_clinical", "en", "clinical/models")
.setInputCols(Array("sentences", "tokens"))
.setOutputCol("pos_tags")
val events_ner_tagger = MedicalNerModel.pretrained("ner_events_clinical", "en", "clinical/models")
.setInputCols(Array("sentences", "tokens", "embeddings"))
.setOutputCol("ner_tags")
val ner_chunker = new NerConverterInternal()
.setInputCols(Array("sentences", "tokens", "ner_tags"))
.setOutputCol("ner_chunks")
val dependency_parser = DependencyParserModel()
.pretrained("dependency_conllu", "en")
.setInputCols(Array("sentences", "pos_tags", "tokens"))
.setOutputCol("dependencies")
val events_re_ner_chunk_filter = new RENerChunksFilter()
.setInputCols(Array("ner_chunks", "dependencies"))
.setOutputCol("re_ner_chunks")
val events_re_Model = RelationExtractionDLModel()
.pretrained("redl_date_clinical_biobert", "en", "clinical/models")
.setPredictionThreshold(0.5)
.setInputCols(Array("re_ner_chunks", "sentences"))
.setOutputCol("relations")
val pipeline = new Pipeline().setStages(Array(documenter,sentencer,tokenizer,words_embedder,pos_tagger,events_ner_tagger,ner_chunker,dependency_parser,events_re_ner_chunk_filter,events_re_Model))
val data = Seq("This 73 y/o patient had CT on 1/12/95, with progressive memory and cognitive decline since 8/11/94.").toDS.toDF("text")
val result = pipeline.fit(data).transform(data)
import nlu
nlu.load("en.relation.date").predict("""This 73 y/o patient had CT on 1/12/95, with progressive memory and cognitive decline since 8/11/94.""")
Results
+--------+-------+-------------+-----------+--------------------+-------+-------------+-----------+--------------------+----------+
|relation|entity1|entity1_begin|entity1_end| chunk1|entity2|entity2_begin|entity2_end| chunk2|confidence|
+--------+-------+-------------+-----------+--------------------+-------+-------------+-----------+--------------------+----------+
| 1| TEST| 24| 25| CT| DATE| 30| 36| 1/12/95|0.99997973|
| 1| TEST| 24| 25| CT|PROBLEM| 44| 83|progressive memor...| 0.9998983|
| 1| TEST| 24| 25| CT| DATE| 91| 97| 8/11/94| 0.9997316|
| 1| DATE| 30| 36| 1/12/95|PROBLEM| 44| 83|progressive memor...| 0.9998915|
| 1| DATE| 30| 36| 1/12/95| DATE| 91| 97| 8/11/94| 0.9997931|
| 1|PROBLEM| 44| 83|progressive memor...| DATE| 91| 97| 8/11/94| 0.9998667|
+--------+-------+-------------+-----------+--------------------+-------+-------------+-----------+--------------------+----------+
Model Information
Model Name: | redl_date_clinical_biobert |
Compatibility: | Healthcare NLP 4.2.4+ |
License: | Licensed |
Edition: | Official |
Language: | en |
Size: | 401.7 MB |
References
Trained on an internal dataset.
Benchmarking
label Recall Precision F1 Support
0 0.738 0.729 0.734 84
1 0.945 0.947 0.946 416
Avg. 0.841 0.838 0.840 -