merge

package merge

Ordering

Alphabetic

Visibility

Public
All

Type Members

class ChunkMergeApproach extends AnnotatorApproach[ChunkMergeModel] with CheckLicense with HasMultipleInputAnnotationCols with MergeResourceParams with MergeCommonParams with MergePrioritizationParams with HasFeatures with FilteringParams with HandleExceptionParams with ResetSentenceIndicesParam

Merges two chunk columns coming from two annotators(NER, ContextualParser or any other annotator producing chunks).

Merges two chunk columns coming from two annotators(NER, ContextualParser or any other annotator producing chunks). The merger of the two chunk columns is made by selecting one chunk from one of the columns according to certain criteria. The decision on which chunk to select is made according to the chunk indices in the source document. (chunks with longer lengths and highest information will be kept from each source) Labels can be changed by setReplaceDictResource.

Example

Define a pipeline with 2 different NER models with a ChunkMergeApproach at the end

val data = Seq(("A 63-year-old man presents to the hospital ...")).toDF("text")
val pipeline = new Pipeline().setStages(Array(
  new DocumentAssembler().setInputCol("text").setOutputCol("document"),
  new SentenceDetector().setInputCols("document").setOutputCol("sentence"),
  new Tokenizer().setInputCols("sentence").setOutputCol("token"),
  WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models").setOutputCol("embs"),
  MedicalNerModel.pretrained("ner_jsl", "en", "clinical/models")
    .setInputCols("sentence", "token", "embs").setOutputCol("jsl_ner"),
  new NerConverter().setInputCols("sentence", "token", "jsl_ner").setOutputCol("jsl_ner_chunk"),
  MedicalNerModel.pretrained("ner_bionlp", "en", "clinical/models")
    .setInputCols("sentence", "token", "embs").setOutputCol("bionlp_ner"),
  new NerConverter().setInputCols("sentence", "token", "bionlp_ner")
    .setOutputCol("bionlp_ner_chunk"),
  new ChunkMergeApproach().setInputCols("jsl_ner_chunk", "bionlp_ner_chunk").setOutputCol("merged_chunk")
))

Show results

val result = pipeline.fit(data).transform(data).cache()
result.selectExpr("explode(merged_chunk) as a")
  .selectExpr("a.begin","a.end","a.result as chunk","a.metadata.entity as entity")
  .show(5, false)
+-----+---+-----------+---------+
|begin|end|chunk      |entity   |
+-----+---+-----------+---------+
|5    |15 |63-year-old|Age      |
|17   |19 |man        |Gender   |
|64   |72 |recurrent  |Modifier |
|98   |107|cellulitis |Diagnosis|
|110  |119|pneumonias |Diagnosis|
+-----+---+-----------+---------+

class ChunkMergeModel extends AnnotatorModel[ChunkMergeModel] with CheckLicense with HasMultipleInputAnnotationCols with HasSimpleAnnotate[ChunkMergeModel] with MergeCommonParams with MergeFeatureParams with MergePrioritizationParams with HandleExceptionParams with HasSafeAnnotate[ChunkMergeModel] with HasFeatures with FilteringParams with ResetSentenceIndicesParam
Merges entities coming from different CHUNK annotations

class REChunkMerger extends AnnotatorModel[REChunkMerger] with HasSimpleAnnotate[REChunkMerger]

REChunkMerger annotator merges relation chunks to create a new chunk.

Example

val documentAssembler = new DocumentAssembler()
   .setInputCol("text")
   .setOutputCol("document")

val tokenizer = new Tokenizer()
   .setInputCols(Array("document"))
   .setOutputCol("tokens")

val sentencer = new SentenceDetector()
   .setInputCols(Array("document"))
   .setOutputCol("sentences")

val embedder = WordEmbeddingsModel
   .pretrained("embeddings_clinical", "en", "clinical/models")
   .setInputCols(Array("document", "tokens"))
   .setOutputCol("embeddings")

val posTagger = PerceptronModel
   .pretrained("pos_clinical", "en", "clinical/models")
   .setInputCols(Array("sentences", "tokens"))
   .setOutputCol("posTags")

val nerTagger = MedicalNerModel
   .pretrained("ner_clinical", "en", "clinical/models")
   .setInputCols(Array("sentences", "tokens", "embeddings"))
   .setOutputCol("nerTags")

val nerConverter = new NerConverterInternal()
   .setInputCols(Array("sentences", "tokens", "nerTags"))
   .setOutputCol("nerChunks")

val depencyParser = DependencyParserModel
   .pretrained("dependency_conllu", "en")
   .setInputCols(Array("document", "posTags", "tokens"))
   .setOutputCol("dependencies")

val re = RelationExtractionModel
   .pretrained("re_clinical", "en", "clinical/models")
   .setInputCols(Array("embeddings", "posTags", "nerChunks", "dependencies"))
   .setOutputCol("relations_t")

val REChunkMerger = new REChunkMerger()
   .setInputCols("relations_t")
   .setOutputCol("relation_chunk")
   .setSeparator(" & ")

val flattener = new Flattener()
   .setInputCols("relation_chunk")
   .setExplodeSelectedFields(Map("relation_chunk" -> Array(
                                 "result as result",
                                 "begin as begin",
                                 "end as end",
                                 "metadata.relation_type as relationType",
                                 "metadata.sentence as sentence"
  )))

val pipeline = new Pipeline()
   .setStages(Array(
       documentAssembler,
       sentencer,
       tokenizer,
       embedder,
       posTagger,
       nerTagger,
       nerConverter,
       depencyParser,
       re,
       REChunkMerger,
       flattener
   ))

import spark.implicits._

val data = Seq("A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to " +
 "presentation and subsequent type two diabetes mellitus ( T2DM ). one prior episode of HTG-induced pancreatitis " +
 "three years prior to presentation , associated with an acute hepatitis , and obesity with a body mass index ")
   .toDS()
   .toDF("text")

val result = pipeline.fit(data).transform(data)

Show Results

+---------------------------------------------------------------------+-----+---+------------+--------+
|result                                                               |begin|end|relationType|sentence|
+---------------------------------------------------------------------+-----+---+------------+--------+
|gestational diabetes mellitus & subsequent type two diabetes mellitus|39   |153|TeRP        |0       |
|gestational diabetes mellitus & T2DM                                 |39   |160|TeRP        |0       |
|subsequent type two diabetes mellitus & T2DM                         |117  |160|TeRP        |0       |
+---------------------------------------------------------------------+-----+---+------------+--------+

trait ReadablePretrainedChunkMerge extends ParamsAndFeaturesReadable[ChunkMergeModel] with HasPretrained[ChunkMergeModel]

Value Members

object ChunkMergeModel extends ReadablePretrainedChunkMerge with Serializable
object REChunkMerger extends DefaultParamsReadable[REChunkMerger] with Serializable
This is the companion object of REChunkMerger.
This is the companion object of REChunkMerger. Please refer to that class for the documentation.

Packages

merge

package merge

Type Members

Example

Example

Value Members

Ungrouped

Packages

merge 

package merge

Type Members

Example

Example

Value Members

Ungrouped

merge