Showing posts with label hugging face. Show all posts
Showing posts with label hugging face. Show all posts

7/23/2024

download all files in specific folder from hugging face model

 refer to code:


.

# Download all files from the IP-Adapter/sdxl_models folder
from huggingface_hub import snapshot_download

# Download the sdxl_models folder and its contents
snapshot_download(
repo_id="h94/IP-Adapter",
repo_type="model",
local_dir="./IP-Adapter_sdxl_models",
allow_patterns=["sdxl_models/*"]
)

..


It download all files under the sdxl_models folder.


Thank you.

7/19/2024

Download specific model from hugging face

 refer to code

.

import os
import shutil
from huggingface_hub import hf_hub_download

# Repository name
repo_id = "h94/IP-Adapter"
# Directory to save the downloaded files
local_directory = "./models/image_encoder"

# Ensure the local directory exists
os.makedirs(local_directory, exist_ok=True)

# List of files to download
files_to_download = [
"models/image_encoder/config.json",
"models/image_encoder/model.safetensors",
"models/image_encoder/pytorch_model.bin"
]

# Download each file and move it to the desired directory
for file in files_to_download:
file_path = hf_hub_download(repo_id=repo_id, filename=file)
# Construct the destination path
dest_path = os.path.join(local_directory, os.path.basename(file))
# Move the file to the destination path
shutil.move(file_path, dest_path)
print(f"Downloaded and moved to {dest_path}")

..


Thank you.


other option is

file_path = hf_hub_download(repo_id=repo_id, filename=file, cache_dir=local_directory, force_download=True)


7/15/2023

combine costum fc with hugging face model, good to remember and modify for modifications

 refer to code:


.

    def model_forward(self, pixel_values, labels):
# Origin vit encoder-decoder outputs
outputs = self.model(pixel_values=pixel_values, labels=labels, output_hidden_states=True)
# Get last hidden state
last_hidden_state = outputs.decoder_hidden_states[-1] # batch_size, seq_len, hidden_size, ex)5, 15, 768
return last_hidden_state

def fc_part(self, last_hidden_state):
# Reshape the last hidden state
reshaped_logits = last_hidden_state.view(-1, self.model.config.decoder.hidden_size) # batch_size*seq_len, hidden_size
# Apply the fully connected layer
new_logits = self.custom_decoder_fc(reshaped_logits) # batch_size*seq_len, vocab_size
return new_logits

def compute_loss(self, new_logits, labels):
# Reshape labels to match logits dimension
reshaped_labels = labels.view(-1) #batch_size, seq_len -> batch_size*seq_len
# Calculate loss
# [batch_size*seq_len, vocab_size] vs [batch_size*seq_len] #ex) [70, 13] vs [70]
loss = self.loss_f(new_logits, reshaped_labels) #scalar tensor
return loss

def forward_pass(self, pixel_values, labels):
last_hidden_state = self.model_forward(pixel_values, labels) # batch_size, seq_len, hidden_size
new_logits = self.fc_part(last_hidden_state) # batch_size*seq_len, vocab_size
loss = self.compute_loss(new_logits, labels) # scalar tensor
# Reshape new_logits to match labels dimension
new_logits = new_logits.view(labels.shape[0], labels.shape[1], -1) # bathc_size, seq_len, vocab_size

return {'logits':new_logits, 'loss':loss}

..


forward_pass do process step by step.

And in the end return last hidden states logits and loss.


Thank you.

www.marearts.com

🙇🏻‍♂️

7/13/2023

Beam search function for image to text or nlp inference purpose.

  refer to code first.

.

#this beam search only deal with batch size 1
def beam_search(self, pixel_value, max_length):
beam_size = self.cfg.num_beams
alpha = self.cfg.beam_alpha # Length normalization coefficient
temperature = self.cfg.beam_temp # Temperature for softmax

# Initialize input ids as bos_token_id
first_sequence = torch.full((pixel_value.shape[0], 1), self.model.config.decoder_start_token_id).to(pixel_value.device)
# ic(first_sequence) #tensor([[1]])

# Predict second token id
outputs = self.forward_pass(pixel_value, first_sequence)
# ic(outputs.keys()) #dict_keys(['logits', 'loss'])
# We only need the logits corresponding to the last prediction
next_token_logits = outputs['logits'][:, -1, :]
# ic(outputs['logits'].shape) #[1, 1, 13] batch, seq, vocab_size
# ic(outputs['logits'][:, -1, :].shape) #[1, 13] batch, vocab_size

# Apply temperature
# ic(next_token_logits)
# [-5.0641, 32.7805, -2.6743, -4.6459, 0.8130, -1.3443, -1.2016, -4.0770,
# -3.5401, 0.2425, -5.3685, -1.8074, -5.2606]],
# next_token_logits /= temperature
# ic(next_token_logits)
# [-7.2344, 46.8292, -3.8204, -6.6370, 1.1614, -1.9205, -1.7166, -5.8243,
# -5.0573, 0.3464, -7.6693, -2.5820, -7.5152]],

# Select top k tokens
next_token_probs = F.softmax(next_token_logits, dim=-1)
top_k_probs, top_k_ids = torch.topk(next_token_probs, beam_size)
# ic(F.softmax(next_token_logits, dim=-1))
# tensor([[3.3148e-24, 1.0000e+00, 1.0072e-22, 6.0241e-24, 1.4680e-20, 6.7340e-22,
# 8.2570e-22, 1.3579e-23, 2.9239e-23, 6.4976e-21, 2.1458e-24, 3.4751e-22,
# 2.5034e-24]]
# ic(top_k_probs, top_k_ids)
# top_k_probs: tensor([[1.]], grad_fn=<TopkBackward0>)
# top_k_ids: tensor([[1]])

# Prepare next sequences. Each top 1 token is appended to the first_sequence
# ic(first_sequence.shape) #[1, 1]
next_sequences = first_sequence.repeat_interleave(beam_size, dim=0)
# ic(next_sequences.shape) #[10, 1] 10 is beam size, 1 is seq length
next_sequences = torch.cat([next_sequences, top_k_ids.view(-1, 1)], dim=-1)
# ic(next_sequences.shape) #[10, 2] 10 is beam size, 2 is seq length
# ic(next_sequences)

# Also prepare a tensor to hold the cumulative scores of each sequence, or the sum of the log probabilities of each token in the sequence
sequence_scores = (torch.log(top_k_probs).view(-1)) #/ (1 + 1) ** alpha
# ic(sequence_scores) #[ 0.0000, -15.9837]

# We'll need to repeat the pixel_values for each sequence in each beam
pixel_value = pixel_value.repeat_interleave(beam_size, dim=0)
# ic(pixel_value.shape) #[10, 3, 224, 224], 10 is beam size, 3 is channel, 224 is image size

for idx in range(max_length - 1): # We already generated one token
# ic(idx, '--------------------')
outputs = self.forward_pass(pixel_value, next_sequences)
next_token_logits = outputs['logits'][:, -1, :]
# ic(outputs['logits'].shape, outputs['logits']) #[2, 2, 13], batch, seq, vocab_size
# ic(next_token_logits.shape, next_token_logits)

# Apply temperature
# next_token_logits /= temperature

# Convert logits to probabilities and calculate new scores
next_token_probs = F.softmax(next_token_logits, dim=-1)
# ic(next_token_probs.shape, next_token_probs) #[2, 13], batch, vocab_size
next_token_scores = torch.log(next_token_probs)
# ic(next_token_scores.shape, next_token_scores) #[2, 13], batch, vocab_size

new_scores = sequence_scores.unsqueeze(1) + next_token_scores
# ic(sequence_scores.unsqueeze(1))
# ic(new_scores.shape, new_scores) #[2, 13], batch, vocab_size

# Select top k sequences
# ic(new_scores.view(-1), new_scores.view(-1).shape)
top_k_scores, top_k_indices = torch.topk(new_scores.view(-1), beam_size)

# ic(top_k_scores, top_k_indices)

# Get the beam and token that each of the top k sequences comes from
beams_indices = top_k_indices // self.cfg.num_tokens
token_indices = top_k_indices % self.cfg.num_tokens
# ic(beams_indices, token_indices)

# Update pixel values, sequences, and scores
# pixel_value = pixel_value[beams_indices]
# ic(next_sequences)
next_sequences = next_sequences[beams_indices]
# ic(next_sequences)
next_sequences = torch.cat([next_sequences, token_indices.unsqueeze(1)], dim=-1)
# ic(next_sequences)
sequence_scores = top_k_scores #/ (idx + 3) ** alpha

# ic('-------------------')
# if idx > 2: break

# Select the best sequence
max_score, max_score_idx = torch.max(sequence_scores, 0)
# Select the sequence with the highest score
best_sequence = next_sequences[max_score_idx]

# ic(best_sequence, max_score)
return best_sequence, max_score

..


This is portion of my class. 

There are omitted code especially forward_pass however the code will work properly if you adapt this carefully. 

And you can also capture some idea from here.

Thank you.

🙇🏻‍♂️

www.marearts.com