Notebook Fifteen |
Repository
Visualise Attention in Transformers
Andrea Leone
University of Trento
January 2022
import transformers
import bertviz
model_version = 'bert-base-uncased'
transformers.utils.logging.set_verbosity_error()
model = transformers.BertModel.from_pretrained(model_version, output_attentions=True)
tokenizer = transformers.BertTokenizer.from_pretrained(model_version)
sentence_a = "The cat sat on the mat"
sentence_b = "The cat lay on the rug"
inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt')
input_ids = inputs['input_ids']
token_type_ids = inputs['token_type_ids']
attention = model(input_ids, token_type_ids=token_type_ids)[-1]
sentence_b_start = token_type_ids[0].tolist().index(1)
input_id_list = input_ids[0].tolist()
tokens = tokenizer.convert_ids_to_tokens(input_id_list)
bertviz.head_view(attention, tokens, sentence_b_start)
tokenizer = transformers.AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
model = transformers.AutoModel.from_pretrained("Helsinki-NLP/opus-mt-en-de", output_attentions=True)
string_en = "She sees the small elephant."
string_de = "Sie sieht den kleinen Elefanten."
encoder_input_ids = tokenizer(string_en, return_tensors="pt", add_special_tokens=True).input_ids
decoder_input_ids = tokenizer(string_de, return_tensors="pt", add_special_tokens=True).input_ids
outputs = model(
input_ids = encoder_input_ids,
decoder_input_ids = decoder_input_ids
)
encoder_text = tokenizer.convert_ids_to_tokens( encoder_input_ids[0] )
decoder_text = tokenizer.convert_ids_to_tokens( decoder_input_ids[0] )
bertviz.model_view(
encoder_attention = outputs.encoder_attentions,
decoder_attention = outputs.decoder_attentions,
cross_attention = outputs.cross_attentions,
encoder_tokens = encoder_text,
decoder_tokens = decoder_text,
display_mode = "light"
)
from bertviz.transformers_neuron_view import BertModel, BertTokenizer
from bertviz.neuron_view import show
model_type = 'bert'
model_version = 'bert-base-uncased'
do_lower_case = True
model = BertModel.from_pretrained(model_version)
tokenizer = BertTokenizer.from_pretrained(model_version, do_lower_case=do_lower_case)
sentence_1 = "I didn't ask for the life that I was given."
sentence_2 = "But it was given nonetheless, and with it I did my best."
show(model,
model_type, tokenizer, sentence_1, sentence_2, display_mode='light'
)