Packages

package re

Ordering
  1. Alphabetic
Visibility
  1. Public
  2. All

Type Members

  1. case class BertREConfig(sentenceStartTokenId: Int = 102, sentenceEndTokenId: Int = 103, entity1StartTokenId: Int = 10, entity1EndTokenId: Int = 11, entity2StartTokenId: Int = 12, entity2EndTokenId: Int = 13, entity1StartTag: String = "e1b", entity1EndTag: String = "e1e", entity2StartTag: String = "e2b", entity2EndTag: String = "e2e") extends Product with Serializable
  2. case class DLRelationInstance(relationType: String, entity1: String, entity2: String, entity1_begin: Int, entity1_end: Int, entity2_begin: Int, entity2_end: Int, chunk1: String, chunk2: String, syntactic_distance: String, context: Sentence) extends Product with Serializable
  3. class PosologyREModel extends RelationExtractionModel with HasStorageRef with ParamsAndFeaturesWritable with CheckLicense

    Instantiated RelationExtractionModel for extracting relationships between different recognized drug entitites.

    Instantiated RelationExtractionModel for extracting relationships between different recognized drug entitites. This class is not intended to be directly used, please use the RelationExtractionModel instead. Possible values are "DRUG-DOSAGE", "DRUG-ADE", "DRUG-FORM", "DRUG-FREQUENCY", "DRUG-ROUTE", "DRUG-REASON", "DRUG-STRENGTH", "DRUG-DURATION". Please see the Models Hub for available models.

    See also

    RelationExtractionModel to use the model

  4. class REDataEncoder extends Serializable
  5. class RENerChunksFilter extends AnnotatorModel[RENerChunksFilter] with ParamsAndFeaturesReadable[RENerChunksFilter] with HasSimpleAnnotate[RENerChunksFilter] with CheckLicense

    Filters and outputs combinations of relations between extracted entities, for further processing.

    Filters and outputs combinations of relations between extracted entities, for further processing. This annotator is especially useful to create inputs for the RelationExtractionDLModel.

    Example

    Define pipeline stages to extract entities

    val documenter = new DocumentAssembler()
      .setInputCol("text")
      .setOutputCol("document")
    
    val sentencer = new SentenceDetector()
      .setInputCols("document")
      .setOutputCol("sentences")
    
    val tokenizer = new Tokenizer()
      .setInputCols("sentences")
      .setOutputCol("tokens")
    
    val words_embedder = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
      .setInputCols("sentences", "tokens")
      .setOutputCol("embeddings")
    
    val pos_tagger = PerceptronModel.pretrained("pos_clinical", "en", "clinical/models")
      .setInputCols("sentences", "tokens")
      .setOutputCol("pos_tags")
    
    val dependency_parser = DependencyParserModel.pretrained("dependency_conllu", "en")
      .setInputCols("sentences", "pos_tags", "tokens")
      .setOutputCol("dependencies")
    
    val clinical_ner_tagger = MedicalNerModel.pretrained("jsl_ner_wip_greedy_clinical","en","clinical/models")
      .setInputCols("sentences", "tokens", "embeddings")
      .setOutputCol("ner_tags")
    
    val ner_chunker = new NerConverter()
      .setInputCols("sentences", "tokens", "ner_tags")
      .setOutputCol("ner_chunks")

    Define the relation pairs and the filter

    val relationPairs = Array("direction-external_body_part_or_region",
                          "external_body_part_or_region-direction",
                          "direction-internal_organ_or_component",
                          "internal_organ_or_component-direction")
    
    val re_ner_chunk_filter = new RENerChunksFilter()
        .setInputCols("ner_chunks", "dependencies")
        .setOutputCol("re_ner_chunks")
        .setMaxSyntacticDistance(4)
        .setRelationPairs(Array("internal_organ_or_component-direction"))
    
    val trained_pipeline = new Pipeline().setStages(Array(
      documenter,
      sentencer,
      tokenizer,
      words_embedder,
      pos_tagger,
      clinical_ner_tagger,
      ner_chunker,
      dependency_parser,
      re_ner_chunk_filter
    ))
    
    val data = Seq("MRI demonstrated infarction in the upper brain stem , left cerebellum and  right basil ganglia").toDF("text")
    val result = trained_pipeline.fit(data).transform(data)

    Show results

    result.selectExpr("explode(re_ner_chunks) as re_chunks")
      .selectExpr("re_chunks.begin", "re_chunks.result", "re_chunks.metadata.entity", "re_chunks.metadata.paired_to")
      .show(6, truncate=false)
    +-----+-------------+---------------------------+---------+
    |begin|result       |entity                     |paired_to|
    +-----+-------------+---------------------------+---------+
    |35   |upper        |Direction                  |41       |
    |41   |brain stem   |Internal_organ_or_component|35       |
    |35   |upper        |Direction                  |59       |
    |59   |cerebellum   |Internal_organ_or_component|35       |
    |35   |upper        |Direction                  |81       |
    |81   |basil ganglia|Internal_organ_or_component|35       |
    +-----+-------------+---------------------------+---------+
    See also

    RelationExtractionDLModel for BERT based extraction

  6. trait ReadRelationExtractionDLModelTensorflowModel extends ReadTensorflowModel
  7. trait ReadablePretrainedRelationExtractionDLModel extends ParamsAndFeaturesReadable[RelationExtractionDLModel] with HasPretrained[RelationExtractionDLModel]
  8. class RelationExtractionApproach extends GenericClassifierApproach

    Trains a TensorFlow model for relation extraction.

    Trains a TensorFlow model for relation extraction. The Tensorflow graph in .pb format needs to be specified with setModelFile. The result is a RelationExtractionModel. To start training, see the parameters that need to be set in the Parameters section.

    Example

    Defining pipeline stages to extract entities first

    val documentAssembler = new DocumentAssembler()
      .setInputCol("text")
      .setOutputCol("document")
    
    val tokenizer = new Tokenizer()
      .setInputCols(Array("document"))
      .setOutputCol("tokens")
    
    val embedder = WordEmbeddingsModel
      .pretrained("embeddings_clinical", "en", "clinical/models")
      .setInputCols(Array("document", "tokens"))
      .setOutputCol("embeddings")
    
    val posTagger = PerceptronModel
      .pretrained("pos_clinical", "en", "clinical/models")
      .setInputCols(Array("document", "tokens"))
      .setOutputCol("posTags")
    
    val nerTagger = MedicalNerModel
      .pretrained("ner_events_clinical", "en", "clinical/models")
      .setInputCols(Array("document", "tokens", "embeddings"))
      .setOutputCol("ner_tags")
    
    val nerConverter = new NerConverter()
      .setInputCols(Array("document", "tokens", "ner_tags"))
      .setOutputCol("nerChunks")
    
    val depencyParser = DependencyParserModel
      .pretrained("dependency_conllu", "en")
      .setInputCols(Array("document", "posTags", "tokens"))
      .setOutputCol("dependencies")

    Then define RelationExtractionApproach and training parameters

    val re = new RelationExtractionApproach()
      .setInputCols(Array("embeddings", "posTags", "train_ner_chunks", "dependencies"))
      .setOutputCol("relations_t")
      .setLabelColumn("target_rel")
      .setEpochsNumber(300)
      .setBatchSize(200)
      .setlearningRate(0.001f)
      .setModelFile("path/to/graph_file.pb")
      .setFixImbalance(true)
      .setValidationSplit(0.05f)
      .setFromEntity("from_begin", "from_end", "from_label")
      .setToEntity("to_begin", "to_end", "to_label")
    
    val finisher = new Finisher()
      .setInputCols(Array("relations_t"))
      .setOutputCols(Array("relations"))
      .setCleanAnnotations(false)
      .setValueSplitSymbol(",")
      .setAnnotationSplitSymbol(",")
      .setOutputAsArray(false)

    Define complete pipeline and start training

    val pipeline = new Pipeline()
      .setStages(Array(
        documentAssembler,
        tokenizer,
        embedder,
        posTagger,
        nerTagger,
        nerConverter,
        depencyParser,
        re,
        finisher))
    
    val model = pipeline.fit(trainData)
    See also

    RelationExtractionModel for pretrained models and how to use it

  9. class RelationExtractionDLModel extends AnnotatorModel[RelationExtractionDLModel] with WriteTensorflowModel with HasStorageRef with HasCaseSensitiveProperties with HasSimpleAnnotate[RelationExtractionDLModel] with CheckLicense

    Extracts and classifies instances of relations between named entities.

    Extracts and classifies instances of relations between named entities. In contrast with RelationExtractionModel, RelationExtractionDLModel is based on BERT. For pretrained models please see the Models Hub for available models.

    Example

    Relation Extraction between body parts

    This is a continuation of the RENerChunksFilter example. See that class on how to extract the relation chunks. Define the extraction model

    val re_ner_chunk_filter = new RENerChunksFilter()
     .setInputCols("ner_chunks", "dependencies")
     .setOutputCol("re_ner_chunks")
     .setMaxSyntacticDistance(4)
     .setRelationPairs(Array("internal_organ_or_component-direction"))
    
    val re_model = RelationExtractionDLModel.pretrained("redl_bodypart_direction_biobert", "en", "clinical/models")
      .setPredictionThreshold(0.5f)
      .setInputCols("re_ner_chunks", "sentences")
      .setOutputCol("relations")
    
    val trained_pipeline = new Pipeline().setStages(Array(
      documenter,
      sentencer,
      tokenizer,
      words_embedder,
      pos_tagger,
      clinical_ner_tagger,
      ner_chunker,
      dependency_parser,
      re_ner_chunk_filter,
      re_model
    ))
    
    val data = Seq("MRI demonstrated infarction in the upper brain stem , left cerebellum and  right basil ganglia").toDF("text")
    val result = trained_pipeline.fit(data).transform(data)

    Show results

    result.selectExpr("explode(relations) as relations")
     .select(
       "relations.metadata.chunk1",
       "relations.metadata.entity1",
       "relations.metadata.chunk2",
       "relations.metadata.entity2",
       "relations.result"
     )
     .where("result != 0")
     .show(truncate=false)
    +------+---------+-------------+---------------------------+------+
    |chunk1|entity1  |chunk2       |entity2                    |result|
    +------+---------+-------------+---------------------------+------+
    |upper |Direction|brain stem   |Internal_organ_or_component|1     |
    |left  |Direction|cerebellum   |Internal_organ_or_component|1     |
    |right |Direction|basil ganglia|Internal_organ_or_component|1     |
    +------+---------+-------------+---------------------------+------+
    See also

    RelationExtractionModel for ML based extraction

    RENerChunksFilter on how to create inputs

  10. class RelationExtractionModel extends GenericClassifierModel

    Extracts and classifies instances of relations between named entities.

    Extracts and classifies instances of relations between named entities. For this, relation pairs need to be defined with setRelationPairs, to specify between which entities the extraction should be done.

    For pretrained models please see the Models Hub for available models.

    Example

    Relation Extraction between body parts

    Define pipeline stages to extract entities

    val documenter = new DocumentAssembler()
      .setInputCol("text")
      .setOutputCol("document")
    
    val sentencer = new SentenceDetector()
      .setInputCols("document")
      .setOutputCol("sentences")
    
    val tokenizer = new Tokenizer()
      .setInputCols("sentences")
      .setOutputCol("tokens")
    
    val words_embedder = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
      .setInputCols("sentences", "tokens")
      .setOutputCol("embeddings")
    
    val pos_tagger = PerceptronModel.pretrained("pos_clinical", "en", "clinical/models")
      .setInputCols("sentences", "tokens")
      .setOutputCol("pos_tags")
    
    val dependency_parser = DependencyParserModel.pretrained("dependency_conllu", "en")
      .setInputCols("sentences", "pos_tags", "tokens")
      .setOutputCol("dependencies")
    
    val clinical_ner_tagger = MedicalNerModel.pretrained("jsl_ner_wip_greedy_clinical","en","clinical/models")
      .setInputCols("sentences", "tokens", "embeddings")
      .setOutputCol("ner_tags")
    
    val ner_chunker = new NerConverter()
      .setInputCols("sentences", "tokens", "ner_tags")
      .setOutputCol("ner_chunks")

    Define the relations that are to be extracted

    val relationPairs = Array("direction-external_body_part_or_region",
                          "external_body_part_or_region-direction",
                          "direction-internal_organ_or_component",
                          "internal_organ_or_component-direction")
    
    val re_model = RelationExtractionModel.pretrained("re_bodypart_directions", "en", "clinical/models")
      .setInputCols("embeddings", "pos_tags", "ner_chunks", "dependencies")
      .setOutputCol("relations")
      .setRelationPairs(relationPairs)
      .setMaxSyntacticDistance(4)
      .setPredictionThreshold(0.9f)
    
    val pipeline = new Pipeline().setStages(Array(
      documenter,
      sentencer,
      tokenizer,
      words_embedder,
      pos_tagger,
      clinical_ner_tagger,
      ner_chunker,
      dependency_parser,
      re_model
    ))
    
    val data = Seq("MRI demonstrated infarction in the upper brain stem , left cerebellum and  right basil ganglia").toDF("text")
    val result = pipeline.fit(data).transform(data)

    Show results

    result.selectExpr("explode(relations) as relations")
     .select(
       "relations.metadata.chunk1",
       "relations.metadata.entity1",
       "relations.metadata.chunk2",
       "relations.metadata.entity2",
       "relations.result"
     )
     .where("result != 0")
     .show(truncate=false)
    +------+---------+-------------+---------------------------+------+
    |chunk1|entity1  |chunk2       |entity2                    |result|
    +------+---------+-------------+---------------------------+------+
    |upper |Direction|brain stem   |Internal_organ_or_component|1     |
    |left  |Direction|cerebellum   |Internal_organ_or_component|1     |
    |right |Direction|basil ganglia|Internal_organ_or_component|1     |
    +------+---------+-------------+---------------------------+------+
    See also

    RelationExtractionApproach to train your own model.

    RelationExtractionDLModel for BERT based extraction

  11. case class RelationInstance(relationType: String, entity1: String, entity2: String, entity1_begin: Int, entity1_end: Int, entity2_begin: Int, entity2_end: Int, chunk1: String, chunk2: String, vector: Array[Float], description: String) extends Product with Serializable

Ungrouped