`sparknlp_jsl.deidentification_module`#

Module Contents#

Classes#

Deidentifier

class Deidentifier(spark, custom_pipeline=None, fields=None, ner_chunk='ner_chunk', sentence='sentence', token='token', document='document', masking_policy='entity_labels', fixed_mask_length=1, obfuscate_date=True, obfuscate_ref_source='faker', obfuscate_ref_file_path=None, age_group_obfuscation=False, age_ranges=None, shift_days=False, number_of_days=None, documenthashcoder_col_name='documentHash', date_tag='DATE', language='en', region='us', unnormalized_date=False, unnormalized_mode='mask', id_column_name='id', date_shift_column_name='dateshift', multi_mode_file_path=None, domain=None, separator='\t', input_file_path=None, output_file_path='deidentified.csv')#

age_group_obfuscation = False#

age_ranges = None#

custom_pipeline = None#

date_shift_column_name = 'dateshift'#

date_tag = 'DATE'#

document = 'document'#

documenthashcoder_col_name = 'documentHash'#

domain = None#

fields = None#

fixed_mask_length = 1#

id_column_name = 'id'#

input_file_path = None#

language = 'en'#

masking_policy = 'entity_labels'#

multi_mode_file_path = None#

ner_chunk = 'ner_chunk'#

number_of_days = None#

obfuscate_date = True#

obfuscate_ref_file_path = None#

obfuscate_ref_source = 'faker'#

output_file_path = 'deidentified.csv'#

region = 'us'#

sentence = 'sentence'#

separator = '\t'#

shift_days = False#

spark#

token = 'token'#

unnormalized_date = False#

unnormalized_mode = 'mask'#

deid_with_custom_pipeline(pretrained_pipeline=None)#: This function is used to deidentify the given data with custom pipeline.

deid_with_pretrained_pipeline()#: Deidentification with pretrained pipeline

deidentify()#: This function deidentifies the input file according to the given field names and saves the results as a csv/json file.

sparknlp_jsl.deidentification_module#

Module Contents#

Classes#

`sparknlp_jsl.deidentification_module`#