7/15/2023

combine costum fc with hugging face model, good to remember and modify for modifications

 refer to code:


.

    def model_forward(self, pixel_values, labels):
# Origin vit encoder-decoder outputs
outputs = self.model(pixel_values=pixel_values, labels=labels, output_hidden_states=True)
# Get last hidden state
last_hidden_state = outputs.decoder_hidden_states[-1] # batch_size, seq_len, hidden_size, ex)5, 15, 768
return last_hidden_state

def fc_part(self, last_hidden_state):
# Reshape the last hidden state
reshaped_logits = last_hidden_state.view(-1, self.model.config.decoder.hidden_size) # batch_size*seq_len, hidden_size
# Apply the fully connected layer
new_logits = self.custom_decoder_fc(reshaped_logits) # batch_size*seq_len, vocab_size
return new_logits

def compute_loss(self, new_logits, labels):
# Reshape labels to match logits dimension
reshaped_labels = labels.view(-1) #batch_size, seq_len -> batch_size*seq_len
# Calculate loss
# [batch_size*seq_len, vocab_size] vs [batch_size*seq_len] #ex) [70, 13] vs [70]
loss = self.loss_f(new_logits, reshaped_labels) #scalar tensor
return loss

def forward_pass(self, pixel_values, labels):
last_hidden_state = self.model_forward(pixel_values, labels) # batch_size, seq_len, hidden_size
new_logits = self.fc_part(last_hidden_state) # batch_size*seq_len, vocab_size
loss = self.compute_loss(new_logits, labels) # scalar tensor
# Reshape new_logits to match labels dimension
new_logits = new_logits.view(labels.shape[0], labels.shape[1], -1) # bathc_size, seq_len, vocab_size

return {'logits':new_logits, 'loss':loss}

..


forward_pass do process step by step.

And in the end return last hidden states logits and loss.


Thank you.

www.marearts.com

πŸ™‡πŸ»‍♂️

7/13/2023

Beam search function for image to text or nlp inference purpose.

  refer to code first.

.

#this beam search only deal with batch size 1
def beam_search(self, pixel_value, max_length):
beam_size = self.cfg.num_beams
alpha = self.cfg.beam_alpha # Length normalization coefficient
temperature = self.cfg.beam_temp # Temperature for softmax

# Initialize input ids as bos_token_id
first_sequence = torch.full((pixel_value.shape[0], 1), self.model.config.decoder_start_token_id).to(pixel_value.device)
# ic(first_sequence) #tensor([[1]])

# Predict second token id
outputs = self.forward_pass(pixel_value, first_sequence)
# ic(outputs.keys()) #dict_keys(['logits', 'loss'])
# We only need the logits corresponding to the last prediction
next_token_logits = outputs['logits'][:, -1, :]
# ic(outputs['logits'].shape) #[1, 1, 13] batch, seq, vocab_size
# ic(outputs['logits'][:, -1, :].shape) #[1, 13] batch, vocab_size

# Apply temperature
# ic(next_token_logits)
# [-5.0641, 32.7805, -2.6743, -4.6459, 0.8130, -1.3443, -1.2016, -4.0770,
# -3.5401, 0.2425, -5.3685, -1.8074, -5.2606]],
# next_token_logits /= temperature
# ic(next_token_logits)
# [-7.2344, 46.8292, -3.8204, -6.6370, 1.1614, -1.9205, -1.7166, -5.8243,
# -5.0573, 0.3464, -7.6693, -2.5820, -7.5152]],

# Select top k tokens
next_token_probs = F.softmax(next_token_logits, dim=-1)
top_k_probs, top_k_ids = torch.topk(next_token_probs, beam_size)
# ic(F.softmax(next_token_logits, dim=-1))
# tensor([[3.3148e-24, 1.0000e+00, 1.0072e-22, 6.0241e-24, 1.4680e-20, 6.7340e-22,
# 8.2570e-22, 1.3579e-23, 2.9239e-23, 6.4976e-21, 2.1458e-24, 3.4751e-22,
# 2.5034e-24]]
# ic(top_k_probs, top_k_ids)
# top_k_probs: tensor([[1.]], grad_fn=<TopkBackward0>)
# top_k_ids: tensor([[1]])

# Prepare next sequences. Each top 1 token is appended to the first_sequence
# ic(first_sequence.shape) #[1, 1]
next_sequences = first_sequence.repeat_interleave(beam_size, dim=0)
# ic(next_sequences.shape) #[10, 1] 10 is beam size, 1 is seq length
next_sequences = torch.cat([next_sequences, top_k_ids.view(-1, 1)], dim=-1)
# ic(next_sequences.shape) #[10, 2] 10 is beam size, 2 is seq length
# ic(next_sequences)

# Also prepare a tensor to hold the cumulative scores of each sequence, or the sum of the log probabilities of each token in the sequence
sequence_scores = (torch.log(top_k_probs).view(-1)) #/ (1 + 1) ** alpha
# ic(sequence_scores) #[ 0.0000, -15.9837]

# We'll need to repeat the pixel_values for each sequence in each beam
pixel_value = pixel_value.repeat_interleave(beam_size, dim=0)
# ic(pixel_value.shape) #[10, 3, 224, 224], 10 is beam size, 3 is channel, 224 is image size

for idx in range(max_length - 1): # We already generated one token
# ic(idx, '--------------------')
outputs = self.forward_pass(pixel_value, next_sequences)
next_token_logits = outputs['logits'][:, -1, :]
# ic(outputs['logits'].shape, outputs['logits']) #[2, 2, 13], batch, seq, vocab_size
# ic(next_token_logits.shape, next_token_logits)

# Apply temperature
# next_token_logits /= temperature

# Convert logits to probabilities and calculate new scores
next_token_probs = F.softmax(next_token_logits, dim=-1)
# ic(next_token_probs.shape, next_token_probs) #[2, 13], batch, vocab_size
next_token_scores = torch.log(next_token_probs)
# ic(next_token_scores.shape, next_token_scores) #[2, 13], batch, vocab_size

new_scores = sequence_scores.unsqueeze(1) + next_token_scores
# ic(sequence_scores.unsqueeze(1))
# ic(new_scores.shape, new_scores) #[2, 13], batch, vocab_size

# Select top k sequences
# ic(new_scores.view(-1), new_scores.view(-1).shape)
top_k_scores, top_k_indices = torch.topk(new_scores.view(-1), beam_size)

# ic(top_k_scores, top_k_indices)

# Get the beam and token that each of the top k sequences comes from
beams_indices = top_k_indices // self.cfg.num_tokens
token_indices = top_k_indices % self.cfg.num_tokens
# ic(beams_indices, token_indices)

# Update pixel values, sequences, and scores
# pixel_value = pixel_value[beams_indices]
# ic(next_sequences)
next_sequences = next_sequences[beams_indices]
# ic(next_sequences)
next_sequences = torch.cat([next_sequences, token_indices.unsqueeze(1)], dim=-1)
# ic(next_sequences)
sequence_scores = top_k_scores #/ (idx + 3) ** alpha

# ic('-------------------')
# if idx > 2: break

# Select the best sequence
max_score, max_score_idx = torch.max(sequence_scores, 0)
# Select the sequence with the highest score
best_sequence = next_sequences[max_score_idx]

# ic(best_sequence, max_score)
return best_sequence, max_score

..


This is portion of my class. 

There are omitted code especially forward_pass however the code will work properly if you adapt this carefully. 

And you can also capture some idea from here.

Thank you.

πŸ™‡πŸ»‍♂️

www.marearts.com


7/07/2023

Magic Keyboard (Mac) make home/end button to move begin or end sentence like window.

 


The solution

The only solution is to change my keyboard so I don’t have to change me.

  1. Open Terminal in MacOS
  2. Type the following commands, one per line:
cd ~/Library
mkdir KeyBindings
cd KeyBindings
nano DefaultKeyBinding.dict
The top part is what your Terminal should look like

3. Next, copy and paste the below key mapping into the editor that shows:

{
/* Remap Home / End keys */
/* Home Button*/
"\UF729" = "moveToBeginningOfLine:";
/* End Button */
"\UF72B" = "moveToEndOfLine:";
/* Shift + Home Button */
"$\UF729" = "moveToBeginningOfLineAndModifySelection:";
/* Shift + End Button */
"$\UF72B" = "moveToEndOfLineAndModifySelection:";
/* Ctrl + Home Button */
"^\UF729" = "moveToBeginningOfDocument:";
/* Ctrl + End Button */
"^\UF72B" = "moveToEndOfDocument:";
/* Shift + Ctrl + Home Button */
"$^\UF729" = "moveToBeginningOfDocumentAndModifySelection:";
/* Shift + Ctrl + End Button*/
"$^\UF72B" = "moveToEndOfDocumentAndModifySelection:";
}
In the editor, paste the above key mappings

4. Then save the file by pressing Control ^ + o then Control ^ + x to exit.

5. Restart your Mac for the changes to take effect.

7/04/2023

CrossEntropyLoss example code using the input which similar with nlp token.

 Refer to code

.

import torch
import torch.nn as nn

# Assume a batch size of 2 and a sequence length of 3, and the model's vocabulary size is 5.
# So, your predicted logits would have a shape of (batch size, sequence length, vocab size)

logits = torch.tensor([
[[0.1, 0.2, 0.3, 0.4, 0.5], [0.5, 0.4, 0.3, 0.2, 0.1], [0.1, 0.2, 0.3, 0.4, 0.5]],
[[0.5, 0.4, 0.3, 0.2, 0.1], [0.1, 0.2, 0.3, 0.4, 0.5], [0.5, 0.4, 0.3, 0.2, 0.1]]
])
logits = logits.view(-1, logits.shape[-1]) # Reshape logits to be 2D (N, C), where N is batch_size*seq_length, C is vocab_size

# Similarly, your labels would have a shape of (batch size, sequence length).
# These are example labels.

labels = torch.tensor([
[0, 1, 2],
[2, 1, 0]
])
labels = labels.view(-1) # Reshape labels to be 1D (N)

loss_function = nn.CrossEntropyLoss() # Initialize loss function
loss = loss_function(logits, labels) # Compute the loss

print(loss) # Print the loss

..




In this example, logits and labels are explicitly defined tensors. The values in logits represent the output from your model for each token in the sequence for each example in your batch, and the labels tensor represents the correct labels or classes for each of these tokens. nn.CrossEntropyLoss() is then used to compute the loss between the predicted logits and the actual labels.




Thank you.

πŸ™‡πŸ»‍♂️

Tokeniser example source code using "BertWordPieceTokenizer", "sentencepiece"


BertWordPieceTokenizer Toeknizer code

.

import os
from tokenizers import BertWordPieceTokenizer

tokenizer = BertWordPieceTokenizer(strip_accents=False, lowercase=False)

corpus_file = ['./ratings.txt'] # data path
vocab_size = 32000 #vocab의 크기. 보톡 32,000이 μ’‹λ‹€κ³  μ•Œλ €μ§.
limit_alphabet= 6000 #merge μˆ˜ν–‰ μ „ initial tokens이 μœ μ§€λ˜λŠ” 숫자 μ œν•œ
output_path = 'hugging_%d'%(vocab_size)
min_frequency = 5 # λ‹¨μ–΄μ˜ μ΅œμ†Œ λ°œμƒ λΉˆλ„
hf_model_path = './'

tokenizer.train(files=corpus_file,
vocab_size=vocab_size,
min_frequency=min_frequency,
limit_alphabet=limit_alphabet,
show_progress=True)

tokenizer.save_model(hf_model_path)

..


BertWordPiece Tokenizer test

.

from transformers import BertTokenizerFast

hf_model_path = './'
tokenizer = BertTokenizerFast.from_pretrained(hf_model_path, strip_accents=False,
lowercase = False)

text = "넀이버 μ˜ν™” 평가 λ¬Έμž₯으둜 ν† ν¬λ‚˜μ΄μ €"
tokenized_input_for_pytorch = tokenizer(text, return_tensors='pt')

print("Tokens (str) : {}".format([tokenizer.convert_ids_to_tokens(s) for s in tokenized_input_for_pytorch['input_ids'].tolist()[0]]))
print("Tokens (int) : {}".format(tokenized_input_for_pytorch['input_ids'].tolist()[0]))
print("Tokens (attn_mask): {}\n".format(tokenized_input_for_pytorch['attention_mask'].tolist()[0]))

# Tokens (str) : ['[CLS]', '넀이버', 'μ˜ν™”', '평가', 'λ¬Έ', '##μž₯', '##으둜', 'ν† ', '##크', '##λ‚˜μ΄', '##μ €', '[SEP]']
# Tokens (int) : [2, 6818, 5834, 6947, 1528, 3340, 5842, 2899, 3390, 8801, 3755, 3]
# Tokens (attn_mask): [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

..


Sentence piece Tokenizer

.

import sentencepiece as spm
import os

input_file = './ratings.txt'
vocab_size = 32000

sp_model_root = 'sentencepiece'
if not os.path.isdir(sp_model_root):
os.mkdir(sp_model_root)
sp_model_name = 'tokenizer_%d' % (vocab_size)
sp_model_path = os.path.join(sp_model_root, sp_model_name)
model_type = 'unigram' #unigram, bpe
character_coverage = 1.0 #default=0.9995
user_defined_symbols = '[PAD],[UNK],[CLS],[SEP],[MASK],[BOS],[EOS],[UNK0],[UNK1],[UNK2],[UNK3],[UNK4],[UNK5],[UNK6],[UNK7],[UNK8],[UNK9],[unused0],[unused1],[unused2],[unused3],[unused4],[unused5],[unused6],[unused7],[unused8],[unused9],[unused10],[unused11],[unused12],[unused13],[unused14],[unused15],[unused16],[unused17],[unused18],[unused19],[unused20],[unused21],[unused22],[unused23],[unused24],[unused25],[unused26],[unused27],[unused28],[unused29],[unused30],[unused31],[unused32],[unused33],[unused34],[unused35],[unused36],[unused37],[unused38],[unused39],[unused40],[unused41],[unused42],[unused43],[unused44],[unused45],[unused46],[unused47],[unused48],[unused49],[unused50],[unused51],[unused52],[unused53],[unused54],[unused55],[unused56],[unused57],[unused58],[unused59],[unused60],[unused61],[unused62],[unused63],[unused64],[unused65],[unused66],[unused67],[unused68],[unused69],[unused70],[unused71],[unused72],[unused73],[unused74],[unused75],[unused76],[unused77],[unused78],[unused79],[unused80],[unused81],[unused82],[unused83],[unused84],[unused85],[unused86],[unused87],[unused88],[unused89],[unused90],[unused91],[unused92],[unused93],[unused94],[unused95],[unused96],[unused97],[unused98],[unused99]'

input_argument = '--input=%s --model_prefix=%s --vocab_size=%s --user_defined_symbols=%s --model_type=%s --character_coverage=%s'
cmd = input_argument%(input_file, sp_model_path, vocab_size,user_defined_symbols, model_type, character_coverage)

spm.SentencePieceTrainer.Train(cmd)

..


Sentence piece Tokenizer test

.

import sentencepiece as spm
sp = spm.SentencePieceProcessor()
sp.Load('{}.model'.format('./sentencepiece/tokenizer_32000'))

text = "넀이버 μ˜ν™” 평가 λ¬Έμž₯으둜 ν† ν¬λ‚˜μ΄μ €"
tokens = sp.encode_as_pieces(text)
ids = sp.encode_as_ids(text)

print("Tokens (str) : {}".format(tokens))
print("Tokens (int) : {}".format(ids))

# Tokens (str) : ['▁넀이버', '▁μ˜ν™”', '▁평가', '▁λ¬Έ', 'μž₯', '으둜', '▁', '토크', 'λ‚˜μ΄', 'μ €']
# Tokens (int) : [1209, 126, 2353, 3552, 412, 166, 123, 22627, 6361, 725]


..


Thank you.

πŸ™‡πŸ»‍♂️


7/03/2023

Generate simple number text image with black background, white foreground

Code

 .

import cv2
import numpy as np
import random
import os
from tqdm import tqdm

def create_dir(path):
if not os.path.exists(path):
os.makedirs(path)

def generate_image(output_dir, image_name, text, height=100, width=300):
# Create a black image
image = np.zeros((height, width, 3), np.uint8)

# Define the font and text color
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
color = (255, 255, 255) # white color

# Get the text size
text_size = cv2.getTextSize(text, font, font_scale, 2)[0]

# Set the text position
text_x = (image.shape[1] - text_size[0]) // 2
text_y = (image.shape[0] + text_size[1]) // 2

# Put text into image
cv2.putText(image, text, (text_x, text_y), font, font_scale, color, 2)

# Save the image
cv2.imwrite(os.path.join(output_dir, image_name), image)

if __name__ == "__main__":
# number of images to generate
num_images = 100

# output directory
output_dir = "./number_images"
create_dir(output_dir)

# generate images
for i in tqdm(range(num_images), desc="Generating images"):
number = random.randint(0, 9999999999) # 10 digits
image_name = f"{number}.png"
generate_image(output_dir, image_name, str(number))

..

Image title is same with number text in Image.


Sample images:



Thank you.
πŸ™‡πŸ»‍♂️


7/02/2023

conda essential command

 


Conda


Create new env by “exploratory” name

  • conda create --name exploratory python=3.8


Activate env and Deactivate

  • conda activate exploratory
  • conda deactivate



Export installed package lists and setup information

  • conda env export --name exploratory > conda_env.yml



Install package from the yaml file and prune that no longer use it

  • conda env update --file conda_env.yml --prune