Auto-Completion Style Text Generation with GPT-2 Model

Advertise here

from functools import lru_cache

from transformers import GPT2LMHeadModel, GPT2Tokenizer

import torch

class AutoComplete:

def __init__(self, model_name=“gpt2”):

“”“Initialize the auto-complete system.”“”

self.tokenizer = GPT2Tokenizer.from_pretrained(model_name, padding_side=“left”)

self.mannequin = GPT2LMHeadModel.from_pretrained(model_name)

self.machine = “cuda” if torch.cuda.is_available() else “cpu”

self.mannequin.to(self.machine)

self.mannequin.eval() # Set to analysis mode

def get_completion(self, textual content, max_length=50):

“”“Generate completion for the enter textual content.”“”

print(“**** Completion:”, textual content)

# Encode the enter textual content

inputs = self.tokenizer(textual content, add_special_tokens=False, return_tensors=“pt”)

input_ids = inputs[“input_ids”].to(self.machine)

attn_masks = inputs[“attention_mask”].to(self.machine)

# Generate completion

with torch.no_grad():

outputs = self.mannequin.generate(

input_ids,

attention_mask=attn_masks,

max_length=max_length,

num_return_sequences=1,

pad_token_id=self.tokenizer.eos_token_id,

do_sample=True,

temperature=0.7

)

# Decode and extract completion

full_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

completion = full_text[len(text):]

return completion

class CachedAutoComplete(AutoComplete):

def __init__(self, cache_size=1000, **kwargs):

“”“Initialize with caching assist.”“”

tremendous().__init__(**kwargs)

self.get_completion = lru_cache(maxsize=cache_size)(

self.get_completion

)

class OptimizedAutoComplete(CachedAutoComplete):

def __init__(self, **kwargs):

“”“Initialize with optimizations.”“”

tremendous().__init__(**kwargs)

self.tokenizer.pad_token = self.tokenizer.eos_token

if self.machine == “cuda”:

self.mannequin = self.mannequin.half() # Use FP16 on GPU

# use eval mode and cuda graphs

self.mannequin.eval()

def preprocess_batch(self, texts):

“”“Effectively course of a number of texts.”“”

# Tokenize all texts without delay

inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors=“pt”)

return inputs.to(self.machine)

def generate_batch(self, texts, max_length=50):

“”“Generate completions for a number of texts.”“”

# Preprocess batch

inputs = self.preprocess_batch(texts)

# Generate completions

with torch.no_grad():

outputs = self.mannequin.generate(

inputs[“input_ids”],

attention_mask=inputs[“attention_mask”],

max_length=max_length,

num_return_sequences=1,

pad_token_id=self.tokenizer.eos_token_id,

do_sample=True,

temperature=0.7

)

# Decode completions

completions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)

# Extract new textual content

outcomes = []

for textual content, completion in zip(texts, completions):

outcomes.append(completion[len(text):])

return outcomes

# Instance: Optimized batch completion

optimized_complete = OptimizedAutoComplete()

texts = [

“Machine learning is”,

“Deep neural networks can”,

“The training process involves”

]

completions = optimized_complete.generate_batch(texts)

for textual content, completion in zip(texts, completions):

print(f“nInput: {textual content}”)

print(f“Completion: {completion}”)

Advertise here

Source link

Auto-Completion Style Text Generation with GPT-2 Model

Lawyer James Bowie guilty of harassment, extortion, death threats

Saskatchewan’s spring sitting kicks off Wednesday with provincial budget

Hollywood director arrested on charges of swindling Netflix out of $11M for a show that never aired

Parents of Student Missing in the Dominican Republic Believe She Drowned

DHS Scraps Surveillance Guardrails for Sexual Orientation and Gender Identity

Vance storms out of Elysee speech as Chinese vice-premier praises UN

Want to Outperform Nearly 92% of Professional Fund Managers? Buy This 1 Investment and Hold It Forever.

Pope Francis Put Church Above His Health, Vatican Observers Say

Cycling advocates call for court injunction over Toronto bike lane removals – Toronto

Auto-Completion Style Text Generation with GPT-2 Model

Related Posts