3/18/2025

Download YouTube Video Python code

 .

import yt_dlp
import os
from typing import Optional
import sys
import platform

def format_size(bytes):
"""Convert bytes to human readable format"""
for unit in ['B', 'KB', 'MB', 'GB']:
if bytes < 1024:
return f"{bytes:.2f} {unit}"
bytes /= 1024
return f"{bytes:.2f} TB"

def get_browser_cookie_path():
"""Get the default browser cookie path based on the operating system"""
system = platform.system()
if system == "Windows":
return "chrome"
elif system == "Darwin": # macOS
return "safari"
else: # Linux and others
return "chrome"

def download_video(url: str, output_path: Optional[str] = None, use_cookies: bool = True, browser: Optional[str] = None) -> str:
"""
Download a YouTube video in the best quality using yt-dlp.
Args:
url (str): The URL of the YouTube video
output_path (str, optional): Directory to save the video
use_cookies (bool): Whether to use browser cookies for authentication
browser (str, optional): Browser to extract cookies from (chrome, firefox, safari, etc.)
"""
try:
if not output_path:
output_path = os.getcwd()
os.makedirs(output_path, exist_ok=True)
# Configure yt-dlp options for best quality
ydl_opts = {
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', # Best video + audio quality
'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
'merge_output_format': 'mp4', # Merge to MP4
'progress_hooks': [lambda d: print(f"\rDownloading: {d['_percent_str']} of {d['_total_bytes_str']}", end="") if d['status'] == 'downloading' else None],
'postprocessor_hooks': [lambda d: print("\nMerging video and audio...") if d['status'] == 'started' else None],
'quiet': False,
'no_warnings': False,
# Additional options for best quality
'format_sort': ['res:2160', 'res:1440', 'res:1080', 'res:720'],
'video_multistreams': True,
'audio_multistreams': True,
'prefer_free_formats': True,
'postprocessors': [{
'key': 'FFmpegVideoConvertor',
'preferedformat': 'mp4',
}],
}
# Add cookie authentication if enabled
if use_cookies:
if not browser:
browser = get_browser_cookie_path()
ydl_opts['cookiesfrombrowser'] = (browser,)
print(f"Using cookies from {browser} for authentication...")
print(f"Fetching video information...")
# Create yt-dlp object and download the video
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# Get video info first
info = ydl.extract_info(url, download=False)
video_title = info.get('title', 'video')
duration = info.get('duration')
formats = info.get('formats', [])
# Find best quality format
best_video = max(
(f for f in formats if f.get('vcodec') != 'none'),
key=lambda f: (
f.get('height', 0),
f.get('filesize', 0)
),
default=None
)
# Print video details
print(f"\nVideo details:")
print(f"Title: {video_title}")
print(f"Duration: {duration//60}:{duration%60:02d}")
if best_video:
print(f"Best quality available: {best_video.get('height', 'N/A')}p")
if best_video.get('filesize'):
print(f"Approximate size: {format_size(best_video['filesize'])}")
print("\nStarting download in best quality...")
# Download the video
ydl.download([url])
# Get the output filename
output_file = os.path.join(output_path, f"{video_title}.mp4")
print(f"\nDownload completed successfully!")
print(f"Saved to: {output_file}")
return output_file
except Exception as e:
print(f"\nError: {str(e)}")
print("\nTroubleshooting steps:")
print("1. Check if the video URL is correct")
print("2. Check your internet connection")
print("3. Make sure yt-dlp is up to date: pip install -U yt-dlp")
print("4. Install or update ffmpeg (required for best quality):")
print(" - On macOS: brew install ffmpeg")
print(" - On Ubuntu/Debian: sudo apt-get install ffmpeg")
print(" - On Windows: download from https://ffmpeg.org/download.html")
print("5. For private videos, make sure:")
print(" - You're logged into YouTube in your browser")
print(" - You have access to the private video")
print(" - The selected browser contains your YouTube login cookies")
return ""

def main():
"""
Main function to handle user input for video download.
"""
print("YouTube Video Downloader (Best Quality)")
print("-------------------------------------")
print("This will download videos in the highest available quality")
print("Note: Higher quality downloads may take longer and use more disk space")
# Parse command line arguments
import argparse
parser = argparse.ArgumentParser(description='Download YouTube videos in best quality')
parser.add_argument('--url', '-u', help='YouTube video URL to download')
parser.add_argument('--output', '-o', help='Output directory')
parser.add_argument('--no-cookies', action='store_true', help='Disable browser cookie authentication')
parser.add_argument('--browser', '-b', choices=['chrome', 'firefox', 'safari', 'edge', 'opera'],
help='Browser to extract cookies from')
args = parser.parse_args()
if args.url:
# Run in command line mode
download_video(args.url,
output_path=args.output,
use_cookies=not args.no_cookies,
browser=args.browser)
return
# Run in interactive mode
while True:
url = input("\nEnter the YouTube video URL (or 'q' to quit): ").strip()
if url.lower() == 'q':
print("Goodbye!")
break
if not url:
print("Please enter a valid URL")
continue
use_cookies = True
browser_choice = None
auth_choice = input("Do you need to access a private video? (y/n): ").strip().lower()
if auth_choice == 'y':
print("\nSelect your browser for authentication:")
print("1. Chrome (default)")
print("2. Firefox")
print("3. Safari")
print("4. Edge")
print("5. Opera")
print("6. None (no authentication)")
browser_num = input("Enter your choice (1-6): ").strip()
if browser_num == '6':
use_cookies = False
else:
browsers = {
'1': 'chrome',
'2': 'firefox',
'3': 'safari',
'4': 'edge',
'5': 'opera'
}
browser_choice = browsers.get(browser_num, 'chrome')
output_dir = input("Enter output directory (press Enter for current directory): ").strip()
if not output_dir:
output_dir = None
download_video(url, output_path=output_dir, use_cookies=use_cookies, browser=browser_choice)
choice = input("\nWould you like to download another video? (y/n): ").strip().lower()
if choice != 'y':
print("Goodbye! -marearts.com-")
break

if __name__ == "__main__":
main()

..


Thank you.


What is LightGBM?

 LightGBM (Light Gradient Boosting Machine) is a gradient boosting framework developed by Microsoft that uses tree-based learning algorithms. It's designed to be efficient, fast, and capable of handling large-scale data with high dimensionality.

Here's a visualization of how LightGBM works:

Key features of LightGBM that make it powerful:

  1. Leaf-wise Tree Growth: Unlike traditional algorithms that grow trees level-wise, LightGBM grows trees leaf-wise, focusing on the leaf that will bring the maximum reduction in loss. This creates more complex trees but uses fewer splits, resulting in higher accuracy with the same number of leaves.

  2. Gradient-based One-Side Sampling (GOSS): This technique retains instances with large gradients (those that need more training) and randomly samples instances with small gradients. This allows LightGBM to focus computational resources on the more informative examples without losing accuracy.

  3. Exclusive Feature Bundling (EFB): For sparse datasets, many features are mutually exclusive (never take non-zero values simultaneously). LightGBM bundles these features together, treating them as a single feature. This reduces memory usage and speeds up training.

  4. Gradient Boosting Framework: Like other boosting algorithms, LightGBM builds trees sequentially, with each new tree correcting the errors of the existing ensemble.

LightGBM is particularly well-suited for your solver selection task because:

  • It handles categorical features natively
  • It works well with the moderate dataset size you have
  • It can create complex decision boundaries needed for multi-class classification
  • It's faster than traditional gradient boosting frameworks, allowing you to train with more boosting rounds

When properly tuned, LightGBM can often achieve better performance than neural networks for tabular data, especially with the right hyperparameters and sufficient boosting rounds.





3/07/2025

Find the largest directories in your home

.

# Find the largest directories in your home

du -h --max-depth=1 ~ | sort -rh | head -20


# Find the largest files

find ~ -type f -exec du -h {} \; | sort -rh | head -20


# Alternatively for a cleaner view of largest files

find ~ -type f -size +100M -exec ls -lh {} \; | sort -k 5 -rh | head -20

..


Thank you


Check my torch support GPU

checkgpu.py

..

import torch

# Check PyTorch version
print(f"PyTorch version: {torch.__version__}")

# Check if CUDA/ROCm is available (unified API in newer PyTorch)
print(f"Is GPU available: {torch.cuda.is_available()}")

# Check how many GPUs are available
if torch.cuda.is_available():
print(f"Number of GPUs: {torch.cuda.device_count()}")
# Print device properties for each GPU
for i in range(torch.cuda.device_count()):
props = torch.cuda.get_device_properties(i)
print(f"\nDevice {i}: {props.name}")
print(f" Total memory: {props.total_memory / 1024**3:.2f} GB")
if hasattr(props, 'major'):
print(f" Compute capability: {props.major}.{props.minor}")

# Try a simple operation on GPU
if torch.cuda.is_available():
device = torch.device("cuda:0") # Use the first GPU
x = torch.ones(5, 5, device=device)
y = x + 1
print("\nGPU computation test:")
print(y)
print("GPU computation successful! study.marearts.com")
else:
print("\nNo GPUs available for PyTorch.")

.

๐Ÿ™

Thank you!

2/02/2025

Find and search which webcam is online on your computer.

 python code:


.

import cv2
import time

def check_cameras():
print("\nChecking camera indices 0-9...")
print("----------------------------------------")
working_cameras = []
for i in range(10):
cap = cv2.VideoCapture(i)
if cap.isOpened():
# Try to read a frame
ret, frame = cap.read()
if ret:
# Get camera properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
# Get backend info
backend = cap.getBackendName()
print(f"\n✓ Camera {i} is ONLINE:")
print(f" Resolution: {width}x{height}")
print(f" FPS: {fps}")
print(f" Backend: {backend}")
print(f" Frame shape: {frame.shape}")
# Try to get more detailed format information
fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
fourcc_str = "".join([chr((fourcc >> 8 * i) & 0xFF) for i in range(4)])
print(f" Format: {fourcc_str}")
working_cameras.append(i)
# Test a few more frames to ensure stability
frames_to_test = 5
success_count = 0
for _ in range(frames_to_test):
ret, frame = cap.read()
if ret:
success_count += 1
time.sleep(0.1)
print(f" Stability test: {success_count}/{frames_to_test} frames captured successfully")
else:
print(f"✗ Camera {i}: Device found but cannot read frames")
cap.release()
else:
print(f"✗ Camera {i}: Not available")
print("\n----------------------------------------")
print("Summary:")
if working_cameras:
print(f"Working camera indices: {working_cameras}")
else:
print("No working cameras found")
print("----------------------------------------")

def main():
print("Starting camera detection...")
check_cameras()
print("\nCamera check complete!")

if __name__ == "__main__":
main()

..



output is looks like:

Starting camera detection...


Checking camera indices 0-9...

----------------------------------------


✓ Camera 0 is ONLINE:

  Resolution: 640x480

  FPS: 30.0

  Backend: V4L2

  Frame shape: (480, 640, 3)

  Format: YUYV

  Stability test: 5/5 frames captured successfully

[ WARN:0@0.913] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video1): can't open camera by index

[ERROR:0@0.972] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range

✗ Camera 1: Not available


✓ Camera 2 is ONLINE:

  Resolution: 640x480

  FPS: 30.0

  Backend: V4L2

  Frame shape: (480, 640, 3)

  Format: YUYV

  Stability test: 5/5 frames captured successfully

[ WARN:0@1.818] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video3): can't open camera by index

[ERROR:0@1.820] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range

✗ Camera 3: Not available

[ WARN:0@1.820] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video4): can't open camera by index

[ERROR:0@1.822] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range

✗ Camera 4: Not available

[ WARN:0@1.822] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video5): can't open camera by index

[ERROR:0@1.823] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range

✗ Camera 5: Not available

[ WARN:0@1.824] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video6): can't open camera by index

[ERROR:0@1.825] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range

✗ Camera 6: Not available

[ WARN:0@1.825] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video7): can't open camera by index

[ERROR:0@1.828] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range

✗ Camera 7: Not available

[ WARN:0@1.828] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video8): can't open camera by index

[ERROR:0@1.830] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range

✗ Camera 8: Not available

[ WARN:0@1.830] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video9): can't open camera by index

[ERROR:0@1.831] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range

✗ Camera 9: Not available


----------------------------------------

Summary:

Working camera indices: [0, 2]

----------------------------------------


Camera check complete!



so you can know which one is online


Thank you!


1/19/2025

FP32 vs FP8 with tiny NN model.

I'll create a simple example of a tiny neural network to demonstrate fp8 vs fp32 memory usage. Let's make a small model with these layers:


1. Input: 784 features (like MNIST image 28x28)

2. Hidden layer 1: 512 neurons

3. Hidden layer 2: 256 neurons

4. Output: 10 neurons (for 10 digit classes)


Let's calculate the memory needed for weights:


1. First Layer Weights:

```

784 × 512 = 401,408 weights

+ 512 biases

= 401,920 parameters

```


2. Second Layer Weights:

```

512 × 256 = 131,072 weights

+ 256 biases

= 131,328 parameters

```


3. Output Layer Weights:

```

256 × 10 = 2,560 weights

+ 10 biases

= 2,570 parameters

```


Total Parameters: 535,818


Memory Usage:

```

FP32: 535,818 × 4 bytes = 2,143,272 bytes ≈ 2.14 MB

FP8:  535,818 × 1 byte  =   535,818 bytes ≈ 0.54 MB

```


Let's demonstrate this with some actual matrix multiplication:


```python

# Example of one batch of inference

Input size: 32 images (batch) × 784 features

32 × 784 = 25,088 numbers


For first layer multiplication:

(32 × 784) × (784 × 512) → (32 × 512)

```


During computation:

1. With fp32:

```

Weights in memory: 401,920 × 4 = 1,607,680 bytes

Input in memory: 25,088 × 4 = 100,352 bytes

Output in memory: 16,384 × 4 = 65,536 bytes

Total: ≈ 1.77 MB

```


2. With fp8:

```

Weights in memory: 401,920 × 1 = 401,920 bytes

Input in memory: 25,088 × 1 = 25,088 bytes

Output in memory: 16,384 × 1 = 16,384 bytes

Total: ≈ 0.44 MB

```


During actual computation:

```

1. Load a tile/block of the weight matrix (let's say 128×128)

   fp8: 128×128 = 16,384 bytes

2. Convert this block to fp32: 16,384 × 4 = 65,536 bytes

3. Perform multiplication in fp32

4. Convert result back to fp8

5. Move to next block

```


This shows how even though we compute in fp32, keeping the model in fp8:

1. Uses 1/4 the memory for storage

2. Only needs small blocks in fp32 temporarily

3. Can process larger batches or models with same memory