7/15/2023

combine costum fc with hugging face model, good to remember and modify for modifications

refer to code:

    def model_forward(self, pixel_values, labels):
        # Origin vit encoder-decoder outputs
        outputs = self.model(pixel_values=pixel_values, labels=labels, output_hidden_states=True)
        # Get last hidden state
        last_hidden_state = outputs.decoder_hidden_states[-1] # batch_size, seq_len, hidden_size, ex)5, 15, 768
        return last_hidden_state

    def fc_part(self, last_hidden_state):
        # Reshape the last hidden state
        reshaped_logits = last_hidden_state.view(-1, self.model.config.decoder.hidden_size) # batch_size*seq_len, hidden_size
        # Apply the fully connected layer
        new_logits = self.custom_decoder_fc(reshaped_logits) # batch_size*seq_len, vocab_size
        return new_logits

    def compute_loss(self, new_logits, labels):
        # Reshape labels to match logits dimension
        reshaped_labels = labels.view(-1) #batch_size, seq_len -> batch_size*seq_len
        # Calculate loss
        # [batch_size*seq_len, vocab_size] vs [batch_size*seq_len]  #ex) [70, 13] vs [70]
        loss = self.loss_f(new_logits, reshaped_labels) #scalar tensor
        return loss

    def forward_pass(self, pixel_values, labels):
        last_hidden_state = self.model_forward(pixel_values, labels) # batch_size, seq_len, hidden_size
        new_logits = self.fc_part(last_hidden_state) # batch_size*seq_len, vocab_size
        loss = self.compute_loss(new_logits, labels) # scalar tensor
        
        # Reshape new_logits to match labels dimension
        new_logits = new_logits.view(labels.shape[0], labels.shape[1], -1) # bathc_size, seq_len, vocab_size

        return {'logits':new_logits, 'loss':loss}

forward_pass do process step by step.

And in the end return last hidden states logits and loss.

Thank you.

www.marearts.com

🙇🏻‍♂️

7/13/2023

Beam search function for image to text or nlp inference purpose.

refer to code first.

#this beam search only deal with batch size 1
    def beam_search(self, pixel_value, max_length):
        beam_size = self.cfg.num_beams
        alpha = self.cfg.beam_alpha  # Length normalization coefficient
        temperature = self.cfg.beam_temp  # Temperature for softmax

        # Initialize input ids as bos_token_id
        first_sequence = torch.full((pixel_value.shape[0], 1), self.model.config.decoder_start_token_id).to(pixel_value.device)
        # ic(first_sequence) #tensor([[1]])

        # Predict second token id
        outputs = self.forward_pass(pixel_value, first_sequence)
        # ic(outputs.keys()) #dict_keys(['logits', 'loss'])
        # We only need the logits corresponding to the last prediction
        next_token_logits = outputs['logits'][:, -1, :]  
        # ic(outputs['logits'].shape) #[1, 1, 13] batch, seq, vocab_size
        # ic(outputs['logits'][:, -1, :].shape) #[1, 13] batch, vocab_size

        # Apply temperature
        # ic(next_token_logits) 
        # [-5.0641, 32.7805, -2.6743, -4.6459,  0.8130, -1.3443, -1.2016, -4.0770,
        #                         -3.5401,  0.2425, -5.3685, -1.8074, -5.2606]],
        # next_token_logits /= temperature
        # ic(next_token_logits) 
        # [-7.2344, 46.8292, -3.8204, -6.6370,  1.1614, -1.9205, -1.7166, -5.8243,
        #                         -5.0573,  0.3464, -7.6693, -2.5820, -7.5152]],

        # Select top k tokens
        next_token_probs = F.softmax(next_token_logits, dim=-1) 
        top_k_probs, top_k_ids = torch.topk(next_token_probs, beam_size) 
        # ic(F.softmax(next_token_logits, dim=-1))
        # tensor([[3.3148e-24, 1.0000e+00, 1.0072e-22, 6.0241e-24, 1.4680e-20, 6.7340e-22,
        #                                            8.2570e-22, 1.3579e-23, 2.9239e-23, 6.4976e-21, 2.1458e-24, 3.4751e-22,
        #                                            2.5034e-24]]
        # ic(top_k_probs, top_k_ids)
        # top_k_probs: tensor([[1.]], grad_fn=<TopkBackward0>)
        # top_k_ids: tensor([[1]])

        # Prepare next sequences. Each top 1 token is appended to the first_sequence
        # ic(first_sequence.shape) #[1, 1]
        next_sequences = first_sequence.repeat_interleave(beam_size, dim=0)
        # ic(next_sequences.shape) #[10, 1] 10 is beam size, 1 is seq length
        next_sequences = torch.cat([next_sequences, top_k_ids.view(-1, 1)], dim=-1)
        # ic(next_sequences.shape) #[10, 2] 10 is beam size, 2 is seq length
        # ic(next_sequences) 

        # Also prepare a tensor to hold the cumulative scores of each sequence, or the sum of the log probabilities of each token in the sequence
        sequence_scores = (torch.log(top_k_probs).view(-1))  #/ (1 + 1) ** alpha
        # ic(sequence_scores) #[  0.0000, -15.9837]

        # We'll need to repeat the pixel_values for each sequence in each beam
        pixel_value = pixel_value.repeat_interleave(beam_size, dim=0)  
        # ic(pixel_value.shape) #[10, 3, 224, 224], 10 is beam size, 3 is channel, 224 is image size

        for idx in range(max_length - 1):  # We already generated one token
            # ic(idx, '--------------------')
            outputs = self.forward_pass(pixel_value, next_sequences)
            next_token_logits = outputs['logits'][:, -1, :]  
            # ic(outputs['logits'].shape, outputs['logits']) #[2, 2, 13], batch, seq, vocab_size
            # ic(next_token_logits.shape, next_token_logits)

            
            # Apply temperature
            # next_token_logits /= temperature

            # Convert logits to probabilities and calculate new scores
            next_token_probs = F.softmax(next_token_logits, dim=-1) 
            # ic(next_token_probs.shape, next_token_probs) #[2, 13], batch, vocab_size
            next_token_scores = torch.log(next_token_probs)
            # ic(next_token_scores.shape, next_token_scores) #[2, 13], batch, vocab_size

            new_scores = sequence_scores.unsqueeze(1) + next_token_scores
            # ic(sequence_scores.unsqueeze(1))
            # ic(new_scores.shape, new_scores) #[2, 13], batch, vocab_size

            
            # Select top k sequences
            # ic(new_scores.view(-1), new_scores.view(-1).shape)
            top_k_scores, top_k_indices = torch.topk(new_scores.view(-1), beam_size)  

            # ic(top_k_scores, top_k_indices)
            

            # Get the beam and token that each of the top k sequences comes from
            beams_indices = top_k_indices // self.cfg.num_tokens 
            token_indices = top_k_indices % self.cfg.num_tokens  
            # ic(beams_indices, token_indices)
            

            # Update pixel values, sequences, and scores
            # pixel_value = pixel_value[beams_indices]  
            # ic(next_sequences)
            next_sequences = next_sequences[beams_indices] 
            # ic(next_sequences)
            next_sequences = torch.cat([next_sequences, token_indices.unsqueeze(1)], dim=-1)
            # ic(next_sequences)
            sequence_scores = top_k_scores #/ (idx + 3) ** alpha

            # ic('-------------------')
            # if idx > 2: break

        # Select the best sequence
        max_score, max_score_idx = torch.max(sequence_scores, 0)
        # Select the sequence with the highest score
        best_sequence = next_sequences[max_score_idx]

        # ic(best_sequence, max_score)
        
        return best_sequence, max_score

This is portion of my class.

There are omitted code especially forward_pass however the code will work properly if you adapt this carefully.

And you can also capture some idea from here.

Thank you.

🙇🏻‍♂️

www.marearts.com

7/07/2023

Magic Keyboard (Mac) make home/end button to move begin or end sentence like window.

The solution

The only solution is to change my keyboard so I don’t have to change me.

Open Terminal in MacOS
Type the following commands, one per line:

cd ~/Library
mkdir KeyBindings
cd KeyBindings
nano DefaultKeyBinding.dict

The top part is what your Terminal should look like

3. Next, copy and paste the below key mapping into the editor that shows:

{
/* Remap Home / End keys */
/* Home Button*/
"\UF729" = "moveToBeginningOfLine:"; 
/* End Button */
"\UF72B" = "moveToEndOfLine:"; 
/* Shift + Home Button */
"$\UF729" = "moveToBeginningOfLineAndModifySelection:"; 
/* Shift + End Button */
"$\UF72B" = "moveToEndOfLineAndModifySelection:"; 
/* Ctrl + Home Button */
"^\UF729" = "moveToBeginningOfDocument:"; 
/* Ctrl + End Button */
"^\UF72B" = "moveToEndOfDocument:"; 
 /* Shift + Ctrl + Home Button */
"$^\UF729" = "moveToBeginningOfDocumentAndModifySelection:";
/* Shift + Ctrl + End Button*/
"$^\UF72B" = "moveToEndOfDocumentAndModifySelection:"; 
}

In the editor, paste the above key mappings

4. Then save the file by pressing Control ^ + o then Control ^ + x to exit.

5. Restart your Mac for the changes to take effect.

7/04/2023

CrossEntropyLoss example code using the input which similar with nlp token.

Refer to code

import torch
import torch.nn as nn

# Assume a batch size of 2 and a sequence length of 3, and the model's vocabulary size is 5.
# So, your predicted logits would have a shape of (batch size, sequence length, vocab size)

logits = torch.tensor([
    [[0.1, 0.2, 0.3, 0.4, 0.5], [0.5, 0.4, 0.3, 0.2, 0.1], [0.1, 0.2, 0.3, 0.4, 0.5]],
    [[0.5, 0.4, 0.3, 0.2, 0.1], [0.1, 0.2, 0.3, 0.4, 0.5], [0.5, 0.4, 0.3, 0.2, 0.1]]
])
logits = logits.view(-1, logits.shape[-1])  # Reshape logits to be 2D (N, C), where N is batch_size*seq_length, C is vocab_size

# Similarly, your labels would have a shape of (batch size, sequence length).
# These are example labels.

labels = torch.tensor([
    [0, 1, 2],
    [2, 1, 0]
])
labels = labels.view(-1)  # Reshape labels to be 1D (N)

loss_function = nn.CrossEntropyLoss()  # Initialize loss function
loss = loss_function(logits, labels)  # Compute the loss

print(loss)  # Print the loss

In this example, logits and labels are explicitly defined tensors. The values in logits represent the output from your model for each token in the sequence for each example in your batch, and the labels tensor represents the correct labels or classes for each of these tokens. nn.CrossEntropyLoss() is then used to compute the loss between the predicted logits and the actual labels.

Thank you.

🙇🏻‍♂️

Tokeniser example source code using "BertWordPieceTokenizer", "sentencepiece"

BertWordPieceTokenizer Toeknizer code

import os
from tokenizers import BertWordPieceTokenizer

tokenizer = BertWordPieceTokenizer(strip_accents=False, lowercase=False)

corpus_file   = ['./ratings.txt']  # data path
vocab_size    = 32000   #vocab의 크기. 보통 32,000이 좋다고 알려짐.
limit_alphabet= 6000    #merge 수행 전 initial tokens이 유지되는 숫자 제한
output_path   = 'hugging_%d'%(vocab_size)
min_frequency = 5  # 단어의 최소 발생 빈도
hf_model_path = './'

tokenizer.train(files=corpus_file,
               vocab_size=vocab_size,
               min_frequency=min_frequency,
               limit_alphabet=limit_alphabet, 
               show_progress=True)

tokenizer.save_model(hf_model_path)

BertWordPiece Tokenizer test

from transformers import BertTokenizerFast

hf_model_path = './'
tokenizer = BertTokenizerFast.from_pretrained(hf_model_path, strip_accents=False,
                                              lowercase = False)

text = "네이버 영화 평가 문장으로 토크나이저"
tokenized_input_for_pytorch = tokenizer(text, return_tensors='pt')

print("Tokens (str)      : {}".format([tokenizer.convert_ids_to_tokens(s) for s in tokenized_input_for_pytorch['input_ids'].tolist()[0]]))
print("Tokens (int)      : {}".format(tokenized_input_for_pytorch['input_ids'].tolist()[0]))
print("Tokens (attn_mask): {}\n".format(tokenized_input_for_pytorch['attention_mask'].tolist()[0]))

# Tokens (str)      : ['[CLS]', '네이버', '영화', '평가', '문', '##장', '##으로', '토', '##크', '##나이', '##저', '[SEP]']
# Tokens (int)      : [2, 6818, 5834, 6947, 1528, 3340, 5842, 2899, 3390, 8801, 3755, 3]
# Tokens (attn_mask): [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

Sentence piece Tokenizer

import sentencepiece as spm
import os

input_file = './ratings.txt'
vocab_size = 32000

sp_model_root = 'sentencepiece'
if not os.path.isdir(sp_model_root):
    os.mkdir(sp_model_root)
sp_model_name = 'tokenizer_%d' % (vocab_size)
sp_model_path = os.path.join(sp_model_root, sp_model_name)
model_type = 'unigram' #unigram, bpe
character_coverage = 1.0 #default=0.9995
user_defined_symbols = '[PAD],[UNK],[CLS],[SEP],[MASK],[BOS],[EOS],[UNK0],[UNK1],[UNK2],[UNK3],[UNK4],[UNK5],[UNK6],[UNK7],[UNK8],[UNK9],[unused0],[unused1],[unused2],[unused3],[unused4],[unused5],[unused6],[unused7],[unused8],[unused9],[unused10],[unused11],[unused12],[unused13],[unused14],[unused15],[unused16],[unused17],[unused18],[unused19],[unused20],[unused21],[unused22],[unused23],[unused24],[unused25],[unused26],[unused27],[unused28],[unused29],[unused30],[unused31],[unused32],[unused33],[unused34],[unused35],[unused36],[unused37],[unused38],[unused39],[unused40],[unused41],[unused42],[unused43],[unused44],[unused45],[unused46],[unused47],[unused48],[unused49],[unused50],[unused51],[unused52],[unused53],[unused54],[unused55],[unused56],[unused57],[unused58],[unused59],[unused60],[unused61],[unused62],[unused63],[unused64],[unused65],[unused66],[unused67],[unused68],[unused69],[unused70],[unused71],[unused72],[unused73],[unused74],[unused75],[unused76],[unused77],[unused78],[unused79],[unused80],[unused81],[unused82],[unused83],[unused84],[unused85],[unused86],[unused87],[unused88],[unused89],[unused90],[unused91],[unused92],[unused93],[unused94],[unused95],[unused96],[unused97],[unused98],[unused99]'

input_argument = '--input=%s --model_prefix=%s --vocab_size=%s --user_defined_symbols=%s --model_type=%s --character_coverage=%s'
cmd = input_argument%(input_file, sp_model_path, vocab_size,user_defined_symbols, model_type, character_coverage)

spm.SentencePieceTrainer.Train(cmd)

Sentence piece Tokenizer test

import sentencepiece as spm
sp = spm.SentencePieceProcessor()
sp.Load('{}.model'.format('./sentencepiece/tokenizer_32000'))

text = "네이버 영화 평가 문장으로 토크나이저"
tokens = sp.encode_as_pieces(text)
ids = sp.encode_as_ids(text)

print("Tokens (str)      : {}".format(tokens))
print("Tokens (int)      : {}".format(ids))

# Tokens (str)      : ['▁네이버', '▁영화', '▁평가', '▁문', '장', '으로', '▁', '토크', '나이', '저']
# Tokens (int)      : [1209, 126, 2353, 3552, 412, 166, 123, 22627, 6361, 725]

Thank you.

🙇🏻‍♂️

7/03/2023

Generate simple number text image with black background, white foreground

Code

import cv2
import numpy as np
import random
import os
from tqdm import tqdm

def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def generate_image(output_dir, image_name, text, height=100, width=300):
    # Create a black image
    image = np.zeros((height, width, 3), np.uint8)

    # Define the font and text color
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1
    color = (255, 255, 255) # white color

    # Get the text size
    text_size = cv2.getTextSize(text, font, font_scale, 2)[0]

    # Set the text position
    text_x = (image.shape[1] - text_size[0]) // 2
    text_y = (image.shape[0] + text_size[1]) // 2

    # Put text into image
    cv2.putText(image, text, (text_x, text_y), font, font_scale, color, 2)

    # Save the image
    cv2.imwrite(os.path.join(output_dir, image_name), image)

if __name__ == "__main__":
    # number of images to generate
    num_images = 100

    # output directory
    output_dir = "./number_images"
    create_dir(output_dir)

    # generate images
    for i in tqdm(range(num_images), desc="Generating images"):
        number = random.randint(0, 9999999999) # 10 digits
        image_name = f"{number}.png"
        generate_image(output_dir, image_name, str(number))