Applications with Context Vectors - MachineLearningMastery.com

Advertise here

import numpy as np

import torch

from transformers import BertTokenizer, BertModel

def get_context_vectors(sentence, mannequin, tokenizer):

inputs = tokenizer(sentence, return_tensors=“pt”, add_special_tokens=True)

input_ids = inputs[“input_ids”]

attention_mask = inputs[“attention_mask”]

# Get the tokens (for reference)

tokens = tokenizer.convert_ids_to_tokens(input_ids[0])

# Ahead go, get all hidden states from every layer

with torch.no_grad():

outputs = mannequin(input_ids, attention_mask=attention_mask, output_hidden_states=True)

hidden_states = outputs.hidden_states

# Every ingredient in hidden states has form (batch_size, sequence_length, hidden_size)

# Right here takes the primary ingredient within the batch from the final layer

last_layer_vectors = hidden_states[–1][0].numpy() # Form: (sequence_length, hidden_size)

return tokens, last_layer_vectors

def cosine_similarity(vec1, vec2):

return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

def extract_contextual_keywords(doc, mannequin, tokenizer, top_n=5):

“”“extract contextual key phrases from a doc”“”

# Cut up the doc into sentences (easy break up by interval)

sentences = [s.strip() for s in document.split(“.”) if s.strip()]

# Course of every sentence to get context vectors

all_tokens = []

all_vectors = []

for sentence in sentences:

if not sentence:

proceed # Skip empty sentences

# Get context vectors

tokens, vectors = get_context_vectors(sentence, mannequin, tokenizer)

# Retailer tokens and vectors (excluding particular tokens [CLS] and [SEP])

all_tokens.lengthen(tokens[1:–1])

all_vectors.lengthen(vectors[1:–1])

# Convert to numpy arrays, then calculate the doc vector as common of all token vectors

all_vectors = np.array(all_vectors)

doc_vector = np.imply(all_vectors, axis=0)

# Calculate similarity between every token vector and the doc vector

similarities = []

for token, vec in zip(all_tokens, all_vectors):

# Skip particular tokens, punctuation, and customary phrases

if token in [“[CLS]”, “[SEP]”, “.”, “,”, “!”, “?”, “the”, “a”, “an”, “is”, “are”, “was”, “have been”]:

proceed

# compute similarity, then keep in mind it with the token

sim = cosine_similarity(vec, doc_vector)

similarities.append((sim, token))

# Type the similarity and get the highest N

top_similarities = sorted(similarities, reverse=True)[:top_n]

return prime_similarities

# Instance doc

doc = “”“

Synthetic intelligence is remodeling industries all over the world.

Machine studying algorithms can analyze huge quantities of knowledge to establish patterns and make predictions.

Pure language processing permits computer systems to grasp and generate human language.

Pc imaginative and prescient techniques can acknowledge objects and interpret visible info.

These applied sciences are driving innovation in healthcare, finance, transportation, and plenty of different sectors.

““”

tokenizer = BertTokenizer.from_pretrained(“bert-base-uncased”)

mannequin = BertModel.from_pretrained(“bert-base-uncased”)

mannequin.eval()

# Extract contextual key phrases and print the consequence

top_keywords = extract_contextual_keywords(doc, mannequin, tokenizer, top_n=10)

print(“Prime contextual key phrases:”)

for similarity, token in top_keywords:

print(f“{token}: {similarity:.4f}”)

Advertise here

Source link

Applications with Context Vectors – MachineLearningMastery.com

Pope Francis’ legacy faces test as cardinals prepare to choose successor

Will Pope Francis Be Made a Saint?

From Trump, Zelensky to Prince Williams: Who is at Pope Francis’s funeral?

BTCUSDT Bulls Must Stay Cautious

Ethereum’s Price Dips, But Investors Seize The Opportunity To Stack Up More ETH

Trump’s Stablecoin USD1 Sparks New Wave of Conflict

Magnitude 6.3 earthquake strikes Antofagasta, Chile, EMSC says

ECB's Villeroy reaffirms deposit rates could be at 2% by this summer

Mexican Billionaire Ricardo Salinas Invests 70% Portfolio In Bitcoin, Remaining In Gold

Applications with Context Vectors – MachineLearningMastery.com

Related Posts