From ec7625d4d98da65d4f58e48762a8a5c9bb195715 Mon Sep 17 00:00:00 2001 From: Kris Forbes Date: Tue, 3 Feb 2026 14:36:25 -0500 Subject: [PATCH] Initial commit: Intelligent Disk Cleaner --- .gitignore | 4 + README.md | 71 +++++ delete_av1.rb | 74 +++++ disk_cleaner.py | 468 +++++++++++++++++++++++++++++ nvenc.py | 598 ++++++++++++++++++++++++++++++++++++++ test_driver.py | 92 ++++++ test_driver_rate_limit.py | 62 ++++ test_driver_warning.py | 94 ++++++ test_logger.py | 17 ++ time_tracker | 1 + video_optimizer.rb | 288 ++++++++++++++++++ 11 files changed, 1769 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 delete_av1.rb create mode 100644 disk_cleaner.py create mode 100644 nvenc.py create mode 100644 test_driver.py create mode 100644 test_driver_rate_limit.py create mode 100644 test_driver_warning.py create mode 100644 test_logger.py create mode 160000 time_tracker create mode 100644 video_optimizer.rb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4a84371 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +__pycache__/ +*.pyc +*.json +disk_cleaner_state.json diff --git a/README.md b/README.md new file mode 100644 index 0000000..2fdea51 --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +# Intelligent Disk Cleaner + +A zero-dependency Python script for Linux servers that prevents disk exhaustion by monitoring usage and intelligently cleaning up log files. + +## Key Features + +1. **Zero External Dependencies**: Runs on standard Python 3 libraries. No `pip install` needed. +2. **Dual Thresholds**: + * **Warning (80%)**: Alerts admins to high usage and identifies the largest open file (suspected culprit) without modifying any files. + * **Critical (95%)**: Triggers active cleanup to recover space immediately. +3. **Intelligent Truncation**: Safely shrinks large active log files in-place by removing the oldest 50% of data, preserving the process file handle. +4. **Fallback Cleanup**: If active logs cannot be shrunk, iteratively deletes the oldest rotated log files first. +5. **Spam Prevention**: Rate-limits email notifications to once every 8 hours per disk volume. + +## Requirements + +* **Operating System**: Linux (relies on `/proc` filesystem for process analysis). +* **Runtime**: Python 3.6+. + +## Configuration + +Open `disk_cleaner.py` and configure the settings at the top of the file: + +```python +# --- Configuration --- +THRESHOLD_PERCENT = 95.0 # Critical cleanup trigger +WARNING_THRESHOLD_PERCENT = 80.0 # Warning email trigger +RATE_LIMIT_SECONDS = 8 * 3600 # 8 hours cooldown + +# Email Settings +SMTP_SERVER = "smtp.example.com" +SMTP_PORT = 25 +EMAIL_FROM = "alerts@example.com" +EMAIL_TO = ["admins@example.com"] +``` + +## Usage + +### Manual Execution +Run with root privileges to ensure access to all system processes (required for accurate open file detection): + +```bash +sudo python3 disk_cleaner.py +``` + +### Automation (Cron) +To monitor continuously, add a cron job (e.g., run hourly). + +1. Open root crontab: + ```bash + sudo crontab -e + ``` + +2. Add the schedule: + ```bash + # Run every hour at minute 0 + 0 * * * * /usr/bin/python3 /opt/scripts/disk_cleaner.py >> /var/log/disk_cleaner.log 2>&1 + ``` + +## Logic Flow + +1. **Check Usage**: Scans all mounted partitions (ignoring pseudo-filesystems). +2. **Evaluate State**: + * **< 80%**: Healthy. Exit. + * **80% - 94% (Warning)**: Start scan. Find the largest file held open by any process. Send a "WARNING" email with the file path and size. **No action taken.** + * **> 95% (Critical)**: Start scan. + * **Strategy A**: additional checks confirm files are logs. Truncate the largest active log file by 50%. Check if space is recovered. + * **Strategy B**: If A fails/insufficient, find rotated logs (e.g., `.log.1`, `.gz`) and delete the oldest ones until usage drops. + * **Action Taken**: Send "URGENT" email detailing the cleanup. + * **Action Failed**: If space cannot be freed, send "CRITICAL" email with the largest suspect file. +3. **Rate Limit**: Before sending any email, check the state file (`/tmp/disk_cleaner_state.json`). If an email was sent for this volume in the last 8 hours, suppress the notification. diff --git a/delete_av1.rb b/delete_av1.rb new file mode 100644 index 0000000..b5c9007 --- /dev/null +++ b/delete_av1.rb @@ -0,0 +1,74 @@ +require 'optparse' +require 'open3' +require 'json' + +options = {} +OptionParser.new do |opts| + opts.banner = "Usage: ruby delete_av1.rb [directory]" +end.parse! + +directory = ARGV[0] +if directory.nil? || !Dir.exist?(directory) + puts "Usage: ruby delete_av1.rb [directory]" + exit 1 +end + +def is_av1?(file_path) + # Check only video stream + cmd = ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_streams', '-select_streams', 'v:0', file_path] + stdout, _, status = Open3.capture3(*cmd) + return false unless status.success? + + begin + data = JSON.parse(stdout) + stream = data['streams'][0] + return stream && stream['codec_name'] == 'av1' + rescue + false + end +end + +puts "Scanning #{directory} for AV1 files..." +puts "This might take a while..." + +av1_files = [] + +Dir.glob("#{directory}/**/*").each do |file| + next if File.directory?(file) + next unless ['.mp4', '.mkv', '.avi', '.mov', '.m4v'].include?(File.extname(file).downcase) + + if is_av1?(file) + puts "Found: #{file}" + av1_files << file + end +end + +if av1_files.empty? + puts "\nNo AV1 files found." + exit +end + +puts "\n" + "="*40 +puts "Found #{av1_files.length} AV1 files:" +av1_files.each { |f| puts " - #{f}" } +puts "="*40 +puts "\nFound #{av1_files.length} files encoded with AV1." +print "Do you want to DELETE these files? [y/N]: " +STDOUT.flush +confirm = STDIN.gets.chomp.strip.downcase + +if confirm == 'y' + deleted_count = 0 + av1_files.each do |file| + begin + File.delete(file) + puts "Deleted: #{file}" + deleted_count += 1 + rescue => e + puts "Failed to delete #{file}: #{e.message}" + end + end + puts "\nDeletion complete. #{deleted_count} files removed." +else + puts "\nOperation cancelled. No files were deleted." +end diff --git a/disk_cleaner.py b/disk_cleaner.py new file mode 100644 index 0000000..034aaf9 --- /dev/null +++ b/disk_cleaner.py @@ -0,0 +1,468 @@ +import os +import sys +import shutil +import smtplib +import re +import socket +import glob +from email.message import EmailMessage + +import json +import time + +# --- Configuration --- +THRESHOLD_PERCENT = 95.0 +WARNING_THRESHOLD_PERCENT = 80.0 +# Regex to match somewhat standard log timestamps (e.g. YYYY-MM-DD, MMM DD, ISO8601) +LOG_TIMESTAMP_REGEX = r'(\d{4}-\d{2}-\d{2}|\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})' +# Email settings - placeholders +SMTP_SERVER = "127.0.0.1" +SMTP_PORT = 25 +EMAIL_FROM = "diskcleaner@example.com" +EMAIL_TO = ["admins@example.com"] +STATE_FILE = "/tmp/disk_cleaner_state.json" +RATE_LIMIT_SECONDS = 8 * 3600 + +def load_state(): + try: + if os.path.exists(STATE_FILE): + with open(STATE_FILE, 'r') as f: + return json.load(f) + except Exception as e: + print(f"Warning: Could not load state file: {e}") + return {} + +def save_state(state): + try: + with open(STATE_FILE, 'w') as f: + json.dump(state, f) + except Exception as e: + print(f"Warning: Could not save state file: {e}") + +def should_send_email(mountpoint, state): + """ + Returns True if we should send an email for this mountpoint. + Checks against the 8-hour cooldown. + """ + last_sent = state.get(mountpoint, 0) + if time.time() - last_sent < RATE_LIMIT_SECONDS: + return False + return True + +def record_email_sent(mountpoint, state): + state[mountpoint] = time.time() + save_state(state) + +def send_email(subject, body, mountpoint=None): + # If mountpoint is provided, check rate limit + if mountpoint: + state = load_state() + if not should_send_email(mountpoint, state): + print(f"Rate limit active for {mountpoint}. Suppressing email: {subject}") + return + + msg = EmailMessage() + msg.set_content(body) + msg['Subject'] = subject + msg['From'] = EMAIL_FROM + msg['To'] = ", ".join(EMAIL_TO) + + try: + # In a real scenario, might need login/auth + with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as s: + s.send_message(msg) + print(f"Email sent: {subject}") + + if mountpoint: + record_email_sent(mountpoint, state) + + except Exception as e: + print(f"Failed to send email: {e}") + +def get_partitions(): + """ + Parses /proc/mounts to get list of mounted filesystems. + Returns list of (device, mountpoint, fstype). + """ + partitions = [] + if not os.path.exists('/proc/mounts'): + # Fallback for non-Linux or testing environments without /proc mocks + return [] + + try: + with open('/proc/mounts', 'r') as f: + for line in f: + parts = line.strip().split() + if len(parts) >= 3: + device, mountpoint, fstype = parts[0], parts[1], parts[2] + # Filter out pseudo-filesystems + if fstype not in ('proc', 'sysfs', 'devtmpfs', 'devpts', 'tmpfs', 'cgroup', 'squashfs'): + # rudimentary check: usually we want physical devices or LVM + if device.startswith('/dev/'): + partitions.append((device, mountpoint)) + except Exception as e: + print(f"Error reading /proc/mounts: {e}") + return partitions + +def get_process_name_pid(pid): + """ + Reads /proc/[pid]/comm or cmdline to get process name. + """ + try: + with open(f'/proc/{pid}/comm', 'r') as f: + return f.read().strip() + except: + return "unknown" + +def get_open_files_flat(): + """ + Walks /proc to find all open files. + Returns a list of dicts: {'path': str, 'pid': int, 'size': int} + """ + open_files = [] + # Iterate over all PIDs in /proc + if not os.path.exists('/proc'): + return [] + + for pid_dir in os.listdir('/proc'): + if not pid_dir.isdigit(): + continue + + pid = int(pid_dir) + fd_dir = f'/proc/{pid}/fd' + + try: + # os.listdir might fail if process vanishes + fds = os.listdir(fd_dir) + except (FileNotFoundError, PermissionError): + continue + + for fd in fds: + try: + # Resolve the symlink to get the real file path + link_path = os.path.join(fd_dir, fd) + real_path = os.readlink(link_path) + + # Check if it's a regular file (not a pipe/socket) + if real_path.startswith('/') and os.path.isfile(real_path): + # Get size + size = os.path.getsize(real_path) + open_files.append({ + 'path': real_path, + 'pid': pid, + 'size': size + }) + except (OSError, FileNotFoundError): + continue + + return open_files + +def is_log_file(file_path): + """ + Intelligent check: + 1. 'log' in path (case insensitive) + 2. Read first chunk, look for timestamp-like patterns. + """ + if "log" not in file_path.lower(): + return False, "Filename does not contain 'log'" + + try: + with open(file_path, 'r', errors='ignore') as f: + chunk = f.read(4096) + if re.search(LOG_TIMESTAMP_REGEX, chunk): + return True, "Found timestamps" + else: + return False, "No timestamps found in header" + except Exception as e: + return False, f"Read error: {e}" + +def shrink_file_inplace(file_path): + """ + Removes the first 50% of the file data in-place. + """ + try: + file_size = os.path.getsize(file_path) + if file_size == 0: + return False, "File is empty" + + midpoint = file_size // 2 + chunk_size = 1024 * 1024 * 10 # 10MB chunks + + print(f"Shrinking {file_path} ({file_size} bytes). Removing first {midpoint} bytes.") + + with open(file_path, "r+b") as f: + read_pos = midpoint + write_pos = 0 + + while read_pos < file_size: + f.seek(read_pos) + data = f.read(chunk_size) + bytes_read = len(data) + + if bytes_read == 0: + break + + f.seek(write_pos) + f.write(data) + + read_pos += bytes_read + write_pos += bytes_read + + f.truncate(write_pos) + + print(f"Successfully shrunk {file_path} to {write_pos} bytes.") + return True, f"Removed first {midpoint} bytes. New size: {write_pos}" + except Exception as e: + return False, f"Error shrinking file: {e}" + + +def is_rotated_log(filename): + """ + Checks if a filename looks like a rotated log. + Common patterns: + - .gz, .zip, .tar, .bz2 + - .old, .bak + - .log.1, .log.2, ... + - .log-20240101, ... + """ + # Simple extensions + if filename.lower().endswith(('.gz', '.zip', '.tar', '.bz2', '.old', '.bak')): + return True + + # Numeric suffixes (.1, .2, etc) + if re.search(r'\.log\.\d+$', filename, re.IGNORECASE): + return True + + # Date suffixes (log-YYYYMMDD, etc) + # This is a bit loose, be careful not to match everything. + # Look for 8 digits at end or near end? + if re.search(r'[-_.]\d{8}([-_.]|$)', filename): + return True + + return False + +def find_rotated_logs(mountpoint): + """ + Walks the mountpoint to find rotated logs. + Returns list of (path, size, mtime). + """ + candidates = [] + print(f"Scanning {mountpoint} for rotated logs...") + + try: + mount_dev = os.stat(mountpoint).st_dev + except OSError: + return [] + + for root, dirs, files in os.walk(mountpoint): + # Don't cross filesystems + try: + if os.stat(root).st_dev != mount_dev: + # Remove subdirs from traversal to prevent descending + dirs[:] = [] + continue + except OSError: + continue + + for file in files: + if is_rotated_log(file): + full_path = os.path.join(root, file) + try: + stats = os.stat(full_path) + candidates.append((full_path, stats.st_size, stats.st_mtime)) + except OSError: + pass + + # Sort old -> new + candidates.sort(key=lambda x: x[2]) + return candidates + +def check_disk_usage_percent(mountpoint): + try: + usage = shutil.disk_usage(mountpoint) + return (usage.used / usage.total) * 100 + except OSError: + return 100.0 + +def cleanup_rotated_logs(mountpoint, hostname): + """ + Deletes oldest rotated logs until usage < 80%. + """ + candidates = find_rotated_logs(mountpoint) + deleted_count = 0 + deleted_bytes = 0 + deleted_files = [] + + current_usage = check_disk_usage_percent(mountpoint) + + for path, size, mtime in candidates: + if current_usage <= THRESHOLD_PERCENT: + break + + print(f"Deleting old rotated log: {path} ({size} bytes, mtime: {mtime})") + try: + os.remove(path) + deleted_count += 1 + deleted_bytes += size + + # Re-check usage + current_usage = check_disk_usage_percent(mountpoint) + deleted_files.append(os.path.basename(path)) + + except OSError as e: + print(f"Failed to delete {path}: {e}") + + if deleted_count > 0: + subject = f"URGENT: Rotated Log Cleanup - {hostname} - {mountpoint}" + # Truncate list if too long + file_list_str = ", ".join(deleted_files[:10]) + if len(deleted_files) > 10: + file_list_str += f" and {len(deleted_files)-10} others" + + body = (f"Volume {mountpoint} was full.\n" + f"Action: Deleted {deleted_count} old rotated log files.\n" + f"Total freed: {deleted_bytes / 1024 / 1024:.2f} MB.\n" + f"Files: {file_list_str}\n" + f"Current Usage: {current_usage:.1f}%") + send_email(subject, body, mountpoint) + return True + + return False + +def check_and_clean(): + if os.name == 'nt': + print("Note: This script is designed for Linux (/proc). Windows execution will miss process data.") + + hostname = socket.gethostname() + + # 1. Get Partitions + partitions = get_partitions() + if not partitions: + print("No partitions found via /proc/mounts. (Are you on Windows?)") + partitions = [('/dev/root', '/')] + + # 2. Identify Metadata + critical_partitions = [] + warning_partitions = [] + + for device, mountpoint in partitions: + try: + percent = check_disk_usage_percent(mountpoint) + except OSError: + continue + + if percent > THRESHOLD_PERCENT: + print(f"CRITICAL: Volume {mountpoint} ({device}) is at {percent:.1f}% usage.") + critical_partitions.append(mountpoint) + elif percent > WARNING_THRESHOLD_PERCENT: + print(f"WARNING: Volume {mountpoint} ({device}) is at {percent:.1f}% usage.") + warning_partitions.append(mountpoint) + + if not critical_partitions and not warning_partitions: + print("All volumes are healthy.") + return + + # 3. Found partitions. Now scan processes. + print("High usage detected. Scanning /proc for open files...") + all_open_files = get_open_files_flat() + + # --- PROCESS CRITICAL --- + for mountpoint in critical_partitions: + current_percent = check_disk_usage_percent(mountpoint) + if current_percent <= THRESHOLD_PERCENT: + continue + + # Strategy A: Shrink Open Files + candidates = [] + for file_info in all_open_files: + path = file_info['path'] + if mountpoint == '/': + try: + if os.stat(path).st_dev == os.stat(mountpoint).st_dev: + candidates.append(file_info) + except OSError: + pass + else: + if path.startswith(mountpoint): + candidates.append(file_info) + + candidates.sort(key=lambda x: x['size'], reverse=True) + + shrunk_something = False + for candidate in candidates: + path = candidate['path'] + size = candidate['size'] + + is_log, reason = is_log_file(path) + if is_log: + pid = candidate['pid'] + proc_name = get_process_name_pid(pid) + print(f"Found candidate: {path} ({size} bytes), held by {proc_name} (PID {pid})") + + success, msg = shrink_file_inplace(path) + if success: + subject = f"URGENT: Disk Cleanup Action - {hostname} - {mountpoint}" + body = (f"Volume {mountpoint} was >{THRESHOLD_PERCENT}%.\n" + f"Identified large log file: {path}\n" + f"Process holding file: {proc_name} (PID {pid})\n" + f"Action: {msg}\n") + send_email(subject, body, mountpoint) + shrunk_something = True + break # Re-evaluate usage + + # Check if Strategy A was enough + if check_disk_usage_percent(mountpoint) <= THRESHOLD_PERCENT: + print(f"Volume {mountpoint} is now safe.") + continue + + # Strategy B: Rotated Logs Fallback + print(f"Active log shrinking insufficient or unavailable. Checking for rotated logs on {mountpoint}...") + cleanup_success = cleanup_rotated_logs(mountpoint, hostname) + + if not cleanup_success and not shrunk_something: + print(f"No suitable log file found to clean on {mountpoint}.") + subject = f"CRITICAL: Disk Full - {hostname} - {mountpoint}" + + suspected_culprit = "Unknown" + if candidates: + top_cand = candidates[0] + suspected_culprit = f"{top_cand['path']} ({top_cand['size'] / 1024 / 1024:.2f} MB)" + + body = (f"Volume {mountpoint} is >{THRESHOLD_PERCENT}%.\n" + f"Could not find any suitable open log files or rotated logs to clean automatically.\n" + f"Suspected largest open file: {suspected_culprit}\n" + f"Usage is still {check_disk_usage_percent(mountpoint):.1f}%.") + send_email(subject, body, mountpoint) + + # --- PROCESS WARNINGS --- + for mountpoint in warning_partitions: + # Find culprits but DO NOT TOUCH + candidates = [] + for file_info in all_open_files: + path = file_info['path'] + if mountpoint == '/': + try: + if os.stat(path).st_dev == os.stat(mountpoint).st_dev: + candidates.append(file_info) + except OSError: + pass + else: + if path.startswith(mountpoint): + candidates.append(file_info) + + candidates.sort(key=lambda x: x['size'], reverse=True) + + suspected_culprit = "Unknown" + if candidates: + top_cand = candidates[0] + suspected_culprit = f"{top_cand['path']} ({top_cand['size'] / 1024 / 1024:.2f} MB)" + + subject = f"WARNING: Disk Usage High - {hostname} - {mountpoint}" + body = (f"Volume {mountpoint} is >{WARNING_THRESHOLD_PERCENT}% (Current: {check_disk_usage_percent(mountpoint):.1f}%).\n" + f"Threshold for automatic cleanup is {THRESHOLD_PERCENT}%.\n" + f"Suspected largest open file: {suspected_culprit}\n" + f"Please investigate.") + send_email(subject, body, mountpoint) + +if __name__ == "__main__": + check_and_clean() diff --git a/nvenc.py b/nvenc.py new file mode 100644 index 0000000..07eb37e --- /dev/null +++ b/nvenc.py @@ -0,0 +1,598 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + plugins.nvenc.py + + Written by: Josh.5 + Date: 27 Dec 2023, (11:21 AM) + + Copyright: + Copyright (C) 2021 Josh Sunnex + + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General + Public License as published by the Free Software Foundation, version 3. + + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along with this program. + If not, see . + +""" +""" +Notes: + - Listing available encoder options: + ffmpeg -h encoder=h264_nvenc + ffmpeg -h encoder=hevc_nvenc +""" +import logging +import re +import subprocess + +from video_transcoder.lib.encoders.base import Encoder + +logger = logging.getLogger("Unmanic.Plugin.video_transcoder") + + +def list_available_cuda_devices(): + """ + Return a list of available CUDA devices via nvidia-smi. + """ + gpu_dicts = [] + try: + # Run the nvidia-smi command + result = subprocess.check_output(['nvidia-smi', '-L'], encoding='utf-8') + # Use regular expression to find device IDs, names, and UUIDs + gpu_info = re.findall(r'GPU (\d+): (.+) \(UUID: (.+)\)', result) + # Populate the list of dictionaries for each GPU + for gpu_id, gpu_name, gpu_uuid in gpu_info: + gpu_dicts.append({ + 'hwaccel_device': gpu_id, + 'hwaccel_device_name': f"{gpu_name} (UUID: {gpu_uuid})", + }) + except FileNotFoundError: + # nvidia-smi executable not found + return [] + except subprocess.CalledProcessError: + # nvidia-smi command failed, likely no NVIDIA GPU present + return [] + # Return the list of GPUs + return gpu_dicts + + +def get_configured_device(settings): + """ + Returns the currently configured device + Checks to ensure that the configured device exists and otherwise will return the first device available + :param settings: + :return: + """ + hardware_device = None + # Set the hardware device + hardware_devices = list_available_cuda_devices() + if not hardware_devices: + # Return no options. No hardware device was found + raise Exception("No NVIDIA device found") + # If we have configured a hardware device + if settings.get_setting('nvenc_device') not in ['none']: + # Attempt to match to that configured hardware device + for hw_device in hardware_devices: + if settings.get_setting('nvenc_device') == hw_device.get('hwaccel_device'): + hardware_device = hw_device + break + # If no matching hardware device is set, then select the first one + if not hardware_device: + hardware_device = hardware_devices[0] + return hardware_device + + +class NvencEncoder(Encoder): + def __init__(self, settings=None, probe=None): + super().__init__(settings=settings, probe=probe) + + def _map_pix_fmt(self, is_h264: bool, is_10bit: bool) -> str: + if is_10bit and not is_h264: + return "p010le" + else: + return "nv12" + + def provides(self): + return { + "h264_nvenc": { + "codec": "h264", + "label": "NVENC - h264_nvenc", + }, + "hevc_nvenc": { + "codec": "hevc", + "label": "NVENC - hevc_nvenc", + }, + "av1_nvenc": { + "codec": "av1", + "label": "NVENC - av1_nvenc", + }, + } + + def options(self): + return { + "nvenc_device": "none", + "nvenc_decoding_method": "cpu", + "nvenc_preset": "p4", + "nvenc_tune": "auto", + "nvenc_profile": "main", + "nvenc_encoder_ratecontrol_method": "auto", + "nvenc_encoder_ratecontrol_lookahead": 0, + "nvenc_enable_spatial_aq": False, + "nvenc_enable_temporal_aq": False, + "nvenc_aq_strength": 8, + } + + def generate_default_args(self): + """ + Generate a list of args for using a NVENC decoder + + REF: https://trac.ffmpeg.org/wiki/HWAccelIntro#NVDECCUVID + + :return: + """ + hardware_device = get_configured_device(self.settings) + + generic_kwargs = {} + advanced_kwargs = {} + # Check if we are using a HW accelerated decoder also + if self.settings.get_setting('nvenc_decoding_method') in ['cuda', 'nvdec', 'cuvid']: + generic_kwargs = { + "-hwaccel_device": hardware_device.get('hwaccel_device'), + "-hwaccel": self.settings.get_setting('nvenc_decoding_method'), + "-init_hw_device": "cuda=hw", + "-filter_hw_device": "hw", + } + if self.settings.get_setting('nvenc_decoding_method') in ['cuda', 'nvdec']: + generic_kwargs["-hwaccel_output_format"] = "cuda" + + return generic_kwargs, advanced_kwargs + + def generate_filtergraphs(self, current_filter_args, smart_filters, encoder_name): + """ + Generate the required filter for enabling NVENC/CUDA HW acceleration. + + :return: + """ + generic_kwargs = {} + advanced_kwargs = {} + start_filter_args = [] + end_filter_args = [] + + # Loop over any HW smart filters to be applied and add them as required. + hw_smart_filters = [] + remaining_smart_filters = [] + for sf in smart_filters: + if sf.get("scale"): + w = sf["scale"]["values"]["width"] + hw_smart_filters.append(f"scale_cuda={w}:-1") + else: + remaining_smart_filters.append(sf) + + # Check for HW accelerated decode mode + # All decode methods ('cuda', 'nvdec', 'cuvid') are handled by the same + # filtergraph logic and output CUDA frames. The main FFmpeg command handles the specific decoder. + hw_decode = (self.settings.get_setting('nvenc_decoding_method') or '').lower() in ('cuda', 'nvdec', 'cuvid') + + # Check software format to use + target_fmt = self._target_pix_fmt_for_encoder(encoder_name) + + # Handle HDR + enc_supports_hdr = (encoder_name in ["hevc_nvenc", "av1_nvenc"]) + target_color_config = self._target_color_config_for_encoder(encoder_name) + + # If we have SW filters: + if remaining_smart_filters or current_filter_args: + # If we have SW filters and HW decode (CUDA/NVDEC) is enabled, make decoder produce SW frames + if hw_decode: + generic_kwargs['-hwaccel_output_format'] = target_fmt + + # Add filter to upload software frames to CUDA for CUDA filters + # Note, format conversion (if any - eg yuv422p10le -> p010le) happens after the software filters. + # If a user applies a custom software filter that does not support the pix_fmt, then will need to prefix it with 'format=p010le' + chain = [f"format={target_fmt}"] + if enc_supports_hdr and target_color_config.get('apply_color_params'): + # Apply setparams filter if software filters exist (apply at the start of the filters list) to preserve HDR tags + chain.append(target_color_config['setparams_filter']) + chain += ["hwupload_cuda"] + end_filter_args.append(",".join(chain)) + # If we have no software filters: + elif not hw_decode: + # CPU decode -> setparams (if HDR) -> upload to CUDA + chain = [f"format={target_fmt}"] + if enc_supports_hdr and target_color_config.get('apply_color_params'): + chain.append(target_color_config['setparams_filter']) + chain.append("hwupload_cuda") + start_filter_args.append(",".join(chain)) + + # Add the smart filters to the end + end_filter_args += hw_smart_filters + + # Return built args + return { + "generic_kwargs": generic_kwargs, + "advanced_kwargs": advanced_kwargs, + "smart_filters": remaining_smart_filters, + "start_filter_args": start_filter_args, + "end_filter_args": end_filter_args, + } + + def encoder_details(self, encoder): + hardware_devices = list_available_cuda_devices() + if not hardware_devices: + # Return no options. No hardware device was found + return {} + provides = self.provides() + return provides.get(encoder, {}) + + def stream_args(self, stream_info, stream_id, encoder_name): + generic_kwargs = {} + advanced_kwargs = {} + encoder_args = [] + stream_args = [] + + # Specify the GPU to use for encoding + hardware_device = get_configured_device(self.settings) + stream_args += ['-gpu', str(hardware_device.get('hwaccel_device', '0'))] + + # Handle HDR + enc_supports_hdr = (encoder_name in ["hevc_nvenc", "av1_nvenc"]) + if enc_supports_hdr: + target_color_config = self._target_color_config_for_encoder(encoder_name) + else: + target_color_config = { + "apply_color_params": False + } + if enc_supports_hdr and target_color_config.get('apply_color_params'): + # Force Main10 profile + stream_args += [f'-profile:v:{stream_id}', 'main10'] + + # Use defaults for basic mode + if self.settings.get_setting('mode') in ['basic']: + # Read defaults + defaults = self.options() + + if enc_supports_hdr and target_color_config.get('apply_color_params'): + # Add HDR color tags to the encoder output stream + for k, v in target_color_config.get('stream_color_params', {}).items(): + stream_args += [k, v] + + stream_args += ['-preset', str(defaults.get('nvenc_preset'))] + + return { + "generic_kwargs": generic_kwargs, + "advanced_kwargs": advanced_kwargs, + "encoder_args": encoder_args, + "stream_args": stream_args, + } + + # Add the preset and tune + if self.settings.get_setting('nvenc_preset'): + stream_args += ['-preset', str(self.settings.get_setting('nvenc_preset'))] + if self.settings.get_setting('nvenc_tune') and self.settings.get_setting('nvenc_tune') != 'auto': + stream_args += ['-tune', str(self.settings.get_setting('nvenc_tune'))] + if self.settings.get_setting('nvenc_profile') and self.settings.get_setting('nvenc_profile') != 'auto': + stream_args += [f'-profile:v:{stream_id}', str(self.settings.get_setting('nvenc_profile'))] + + # Apply rate control config + if self.settings.get_setting('nvenc_encoder_ratecontrol_method') in ['constqp', 'vbr', 'cbr']: + # Set the rate control method + stream_args += [f'-rc:v:{stream_id}', str(self.settings.get_setting('nvenc_encoder_ratecontrol_method'))] + rc_la = int(self.settings.get_setting('nvenc_encoder_ratecontrol_lookahead') or 0) + if rc_la > 0: + stream_args += [f'-rc-lookahead:v:{stream_id}', str(rc_la)] + + # Apply adaptive quantization + if self.settings.get_setting('nvenc_enable_spatial_aq'): + stream_args += ['-spatial-aq', '1'] + if self.settings.get_setting('nvenc_enable_spatial_aq') or self.settings.get_setting('nvenc_enable_temporal_aq'): + stream_args += [f'-aq-strength:v:{stream_id}', str(self.settings.get_setting('nvenc_aq_strength'))] + if self.settings.get_setting('nvenc_enable_temporal_aq'): + stream_args += ['-temporal-aq', '1'] + + # If CUVID is enabled, return generic_kwargs + if (self.settings.get_setting('nvenc_decoding_method') or '').lower() in ['cuvid']: + in_codec = stream_info.get('codec_name', 'unknown_codec_name') + generic_kwargs = {f'-c:v:{stream_id}': f'{in_codec}_cuvid'} + + # Add stream color args + if enc_supports_hdr and target_color_config.get('apply_color_params'): + # Add HDR color tags to the encoder output stream + for k, v in target_color_config.get('stream_color_params', {}).items(): + stream_args += [k, v] + + # Return built args + return { + "generic_kwargs": generic_kwargs, + "advanced_kwargs": advanced_kwargs, + "encoder_args": encoder_args, + "stream_args": stream_args, + } + + def __set_default_option(self, select_options, key, default_option=None): + """ + Sets the default option if the currently set option is not available + + :param select_options: + :param key: + :return: + """ + available_options = [] + for option in select_options: + available_options.append(option.get('value')) + if not default_option: + default_option = option.get('value') + current_value = self.settings.get_setting(key) + if not getattr(self.settings, 'apply_default_fallbacks', True): + return current_value + if current_value not in available_options: + # Update in-memory setting for display only. + # IMPORTANT: do not persist settings from plugin. + # Only the Unmanic API calls should persist to JSON file. + self.settings.settings_configured[key] = default_option + return default_option + return current_value + + def get_nvenc_device_form_settings(self): + values = { + "label": "NVIDIA Device", + "sub_setting": True, + "input_type": "select", + "select_options": [ + { + "value": "none", + "label": "No NVIDIA devices available", + } + ] + } + default_option = None + hardware_devices = list_available_cuda_devices() + if hardware_devices: + values['select_options'] = [] + for hw_device in hardware_devices: + if not default_option: + default_option = hw_device.get('hwaccel_device', 'none') + values['select_options'].append({ + "value": hw_device.get('hwaccel_device', 'none'), + "label": "NVIDIA device '{}'".format(hw_device.get('hwaccel_device_name', 'not found')), + }) + if not default_option: + default_option = 'none' + + self.__set_default_option(values['select_options'], 'nvenc_device', default_option=default_option) + if self.settings.get_setting('mode') not in ['standard']: + values["display"] = "hidden" + return values + + def get_nvenc_decoding_method_form_settings(self): + values = { + "label": "Enable HW Decoding", + "description": "Warning: Ensure your device supports decoding the source video codec or it will fail.\n" + "This enables full hardware transcode with NVDEC and NVENC, using only GPU memory for the entire video transcode.\n" + "If filters are configured in the plugin, decoder will output NV12 software surfaces which are slightly slower.\n" + "Note: It is recommended that you disable this option for 10-bit encodes.", + "sub_setting": True, + "input_type": "select", + "select_options": [ + { + "value": "cpu", + "label": "Disabled - Use CPU to decode of video source (provides best compatibility)", + }, + { + "value": "cuda", + "label": "NVDEC/CUDA - Use the GPUs HW decoding the video source and upload surfaces to CUDA (recommended)", + }, + { + "value": "cuvid", + "label": "CUVID - Older interface for HW video decoding. Kepler or older hardware may perform better with this option", + } + ] + } + if self.settings.get_setting('mode') not in ['standard']: + values["display"] = "hidden" + return values + + def get_nvenc_preset_form_settings(self): + values = { + "label": "Encoder quality preset", + "sub_setting": True, + "input_type": "select", + "select_options": [ + { + "value": "p1", + "label": "Fastest (P1)", + }, + { + "value": "p2", + "label": "Faster (P2)", + }, + { + "value": "p3", + "label": "Fast (P3)", + }, + { + "value": "p4", + "label": "Medium (P4) - Balanced performance and quality", + }, + { + "value": "p5", + "label": "Slow (P5)", + }, + { + "value": "p6", + "label": "Slower (P6)", + }, + { + "value": "p7", + "label": "Slowest (P7)", + }, + ], + } + self.__set_default_option(values['select_options'], 'nvenc_preset', default_option='p4') + if self.settings.get_setting('mode') not in ['standard']: + values["display"] = "hidden" + return values + + def get_nvenc_tune_form_settings(self): + values = { + "label": "Tune for a particular type of source or situation", + "sub_setting": True, + "input_type": "select", + "select_options": [ + { + "value": "auto", + "label": "Disabled – Do not apply any tune", + }, + { + "value": "hq", + "label": "HQ – High quality (ffmpeg default)", + }, + { + "value": "ll", + "label": "LL – Low latency", + }, + { + "value": "ull", + "label": "ULL – Ultra low latency", + }, + { + "value": "lossless", + "label": "Lossless", + }, + ], + } + self.__set_default_option(values['select_options'], 'nvenc_tune', default_option='auto') + if self.settings.get_setting('mode') not in ['standard']: + values["display"] = "hidden" + return values + + def get_nvenc_profile_form_settings(self): + values = { + "label": "Profile", + "description": "The profile determines which features of the codec are available and enabled,\n" + "while also affecting other restrictions.\n" + "Any of these profiles are capable of 4:2:0, 4:2:2 and 4:4:4, however the support\n" + "depends on the installed hardware.", + "sub_setting": True, + "input_type": "select", + "select_options": [ + { + "value": "auto", + "label": "Auto – Let ffmpeg automatically select the required profile (recommended)", + }, + { + "value": "baseline", + "label": "Baseline", + }, + { + "value": "main", + "label": "Main", + }, + { + "value": "main10", + "label": "Main10", + }, + { + "value": "high444p", + "label": "High444p", + }, + ], + } + self.__set_default_option(values['select_options'], 'nvenc_profile', default_option='main') + if self.settings.get_setting('mode') not in ['standard']: + values["display"] = "hidden" + return values + + def get_nvenc_encoder_ratecontrol_method_form_settings(self): + values = { + "label": "Encoder ratecontrol method", + "description": "Note that the rate control is already defined in the Encoder Quality Preset option.\n" + "Selecting anything other than 'Disabled' will override the preset rate-control.", + "sub_setting": True, + "input_type": "select", + "select_options": [ + { + "value": "auto", + "label": "Auto – Use the rate control setting pre-defined in the preset option (recommended)", + }, + { + "value": "constqp", + "label": "CQP - Quality based mode using constant quantizer scale", + }, + { + "value": "vbr", + "label": "VBR - Bitrate based mode using variable bitrate", + }, + { + "value": "cbr", + "label": "CBR - Bitrate based mode using constant bitrate", + }, + ] + } + self.__set_default_option(values['select_options'], 'nvenc_encoder_ratecontrol_method', default_option='auto') + if self.settings.get_setting('mode') not in ['standard']: + values["display"] = "hidden" + return values + + def get_nvenc_encoder_ratecontrol_lookahead_form_settings(self): + # Lower is better + values = { + "label": "Configure the number of frames to look ahead for rate-control", + "sub_setting": True, + "input_type": "slider", + "slider_options": { + "min": 0, + "max": 30, + }, + } + if self.settings.get_setting('mode') not in ['standard']: + values["display"] = "hidden" + return values + + def get_nvenc_enable_spatial_aq_form_settings(self): + values = { + "label": "Enable Spatial Adaptive Quantization", + "description": "This adjusts the quantization parameter within each frame based on spatial complexity.\n" + "This helps in improving the quality of areas within a frame that are more detailed or complex.", + "sub_setting": True, + } + if self.settings.get_setting('mode') not in ['standard']: + values["display"] = 'hidden' + return values + + def get_nvenc_enable_temporal_aq_form_settings(self): + values = { + "label": "Enable Temporal Adaptive Quantization", + "description": "This adjusts the quantization parameter across frames, based on the motion and temporal complexity.\n" + "This is particularly effective in scenes with varying levels of motion, enhancing quality where it's most needed.\n" + "This option requires Turing or newer hardware.", + "sub_setting": True, + } + if self.settings.get_setting('mode') not in ['standard']: + values["display"] = 'hidden' + return values + + def get_nvenc_aq_strength_form_settings(self): + # Lower is better + values = { + "label": "Strength of the adaptive quantization", + "description": "Controls the strength of the adaptive quantization (both spatial and temporal).\n" + "A higher value indicates stronger adaptation, which can lead to better preservation\n" + "of detail but might also increase the bitrate.", + "sub_setting": True, + "input_type": "slider", + "slider_options": { + "min": 0, + "max": 15, + }, + } + if self.settings.get_setting('mode') not in ['standard']: + values["display"] = "hidden" + if not self.settings.get_setting('nvenc_enable_spatial_aq'): + values["display"] = "hidden" + return values diff --git a/test_driver.py b/test_driver.py new file mode 100644 index 0000000..716965d --- /dev/null +++ b/test_driver.py @@ -0,0 +1,92 @@ +import unittest +from unittest.mock import MagicMock, patch, mock_open +import os +import time +import subprocess +import shutil +import disk_cleaner +import sys +import tempfile + +def run_test(): + with tempfile.TemporaryDirectory() as temp_dir: + print(f"Running test in {temp_dir}") + + log_file = os.path.join(temp_dir, "test_app.log") + rotated_logs = [os.path.join(temp_dir, f"test_app.log.{i}") for i in range(1, 4)] + + # 1. Create Rotated Logs + print("Creating rotated logs...") + for i, path in enumerate(rotated_logs): + with open(path, "w") as f: + f.write("Old data " * 1000) + t = time.time() - (i + 1) * 3600 + os.utime(path, (t, t)) + print(f"Created {os.path.basename(path)} with mtime {t}") + + # 2. Mocking + usage_calls = 0 + def mock_disk_usage(path): + nonlocal usage_calls + # Always return stateful usage for ANY path in this test (since we only care about the one) + usage_calls += 1 + # Call 1: Init scan + # Call 2: Loop scan + # Call 3: Post-Strategy A check + # Call 4: Pre-Strategy B check (inside cleanup_rotated_logs) -> MUST BE HIGH + if usage_calls <= 4: + return MagicMock(total=1000, used=900, free=100) # 90% + # Call 5: After delete 1 + elif usage_calls == 5: + return MagicMock(total=1000, used=850, free=150) # 85% + # Call 6: After delete 2 -> 75% (Stop) + else: + return MagicMock(total=1000, used=750, free=250) # 75% + + disk_cleaner.shutil.disk_usage = mock_disk_usage + + # Mock Partitions -> Return our temp_dir as the ONLY partition + # disk_cleaner.os.path.exists = lambda p: True # REMOVED: Breaks verification! + # We simulate that the temp_dir IS the mountpoint + disk_cleaner.get_partitions = MagicMock(return_value=[('/dev/test', temp_dir)]) + disk_cleaner.get_open_files_flat = MagicMock(return_value=[]) + + # We don't need to mock os.walk if we point it to a real dir! + # We don't need to mock os.stat if we point to a real dir! + # We just need to make sure disk_cleaner uses the REAL functions. + # BUT, previous test run might have left disk_cleaner.os.walk mocked? + # Since we just import disk_cleaner, if it was modified in memory by previous run... + # Wait, this is a script execution. `python test_driver.py`. Fresh process. + # So disk_cleaner is clean. + + # However, we DO need os.stat to behave well for the device check. + # os.stat(temp_dir).st_dev should equal os.stat(root).st_dev. + # Since they are real directories, this works natively! + + # 3. Run Cleaner + print("Running check_and_clean()...") + disk_cleaner.check_and_clean() + + # 4. Verify + # Rotated logs are: + # log.1 (newest) + # log.2 + # log.3 (oldest) + + if not os.path.exists(rotated_logs[2]): + print("SUCCESS: Oldest log (log.3) was deleted.") + else: + print("FAILURE: Oldest log (log.3) still exists.") + + if not os.path.exists(rotated_logs[1]): + print("SUCCESS: 2nd Oldest log (log.2) was deleted.") + else: + print("FAILURE: 2nd Oldest log (log.2) still exists.") + + if os.path.exists(rotated_logs[0]): + print("SUCCESS: Newest log (log.1) was PRESERVED.") + else: + print("FAILURE: Newest log (log.1) was unexpectedly deleted.") + +if __name__ == "__main__": + run_test() diff --git a/test_driver_rate_limit.py b/test_driver_rate_limit.py new file mode 100644 index 0000000..377dffd --- /dev/null +++ b/test_driver_rate_limit.py @@ -0,0 +1,62 @@ +import unittest +from unittest.mock import MagicMock, patch, mock_open +import os +import time +import json +import disk_cleaner +import tempfile +import sys + +def run_test(): + with tempfile.TemporaryDirectory() as temp_dir: + state_file = os.path.join(temp_dir, "cleaner_state.json") + disk_cleaner.STATE_FILE = state_file + + print(f"Testing rate limiting. State file: {state_file}") + + # Mock send_email internals (smtplib) to verify call count + with patch('smtplib.SMTP') as mock_smtp: + # 1. First Call: Should Send + print("First Trigger (expect send)...") + disk_cleaner.send_email("Subject 1", "Body", "/mnt/data") + + if mock_smtp.called: + print("SUCCESS: Email sent.") + else: + print("FAILURE: Email NOT sent.") + mock_smtp.reset_mock() + + # Verify state file + if os.path.exists(state_file): + with open(state_file) as f: + state = json.load(f) + print(f"State: {state}") + if '/mnt/data' in state: + print("SUCCESS: State recorded.") + else: + print("FAILURE: State missing key.") + else: + print("FAILURE: State file not created.") + + # 2. Second Call (Immediate): Should Suppress + print("Second Trigger (Immediate - expect suppress)...") + disk_cleaner.send_email("Subject 2", "Body", "/mnt/data") + + if not mock_smtp.called: + print("SUCCESS: Email suppressed.") + else: + print("FAILURE: Email sent despite cooldown.") + mock_smtp.reset_mock() + + # 3. Third Call (Different Mount): Should Send + print("Third Trigger (Different Mount - expect send)...") + disk_cleaner.send_email("Subject 3", "Body", "/mnt/other") + + if mock_smtp.called: + print("SUCCESS: Email sent (different key).") + else: + print("FAILURE: Email NOT sent for new key.") + mock_smtp.reset_mock() + +if __name__ == "__main__": + run_test() diff --git a/test_driver_warning.py b/test_driver_warning.py new file mode 100644 index 0000000..c69e7fb --- /dev/null +++ b/test_driver_warning.py @@ -0,0 +1,94 @@ +import unittest +from unittest.mock import MagicMock, patch, mock_open +import os +import time +import json +import disk_cleaner +import tempfile +import sys + +def run_test(): + with tempfile.TemporaryDirectory() as temp_dir: + state_file = os.path.join(temp_dir, "cleaner_state.json") + disk_cleaner.STATE_FILE = state_file + + log_file = os.path.join(temp_dir, "big.log") + with open(log_file, "w") as f: + f.write("A" * 1000) + + print(f"Testing warning threshold. State file: {state_file}") + + # Mock send_email internals + with patch('smtplib.SMTP') as mock_smtp: + + # --- Scenario 1: Warning Level (85%) --- + print("\n--- Scenario 1: Warning Level (85%) ---") + + # Mock Usage: 85% + disk_cleaner.shutil.disk_usage = MagicMock(return_value=MagicMock(total=1000, used=850, free=150)) + + # Mock Partitions + disk_cleaner.os.path.exists = lambda p: True + disk_cleaner.get_partitions = MagicMock(return_value=[('/dev/test', temp_dir)]) + + # Mock Open Files + disk_cleaner.get_open_files_flat = MagicMock(return_value=[ + {'path': log_file, 'pid': 1234, 'size': 1024 * 1024 * 50} # 50MB + ]) + + # Run + disk_cleaner.check_and_clean() + + # Verify Email: Context Manager Mock Pattern + smtp_instance = mock_smtp.return_value.__enter__.return_value + + if smtp_instance.send_message.called: + args, _ = smtp_instance.send_message.call_args + msg = args[0] + if "WARNING:" in msg['Subject']: + print("SUCCESS: Warning email sent.") + print(f"Subject: {msg['Subject']}") + if "Suspected largest open file" in msg.get_content(): + print("SUCCESS: Body identifies suspect file.") + else: + print("FAILURE: Body missing suspect file.") + else: + print(f"FAILURE: Wrong subject: {msg['Subject']}") + else: + print("FAILURE: No email sent.") + + mock_smtp.reset_mock() + + # Reset state for next test + os.remove(state_file) + + # --- Scenario 2: Critical Level (96%) --- + print("\n--- Scenario 2: Critical Level (96%) ---") + + # Mock Usage: 96% + disk_cleaner.shutil.disk_usage = MagicMock(return_value=MagicMock(total=1000, used=960, free=40)) + + # Run + disk_cleaner.check_and_clean() + + # Verify Email + smtp_instance = mock_smtp.return_value.__enter__.return_value + + if smtp_instance.send_message.called: + args, _ = smtp_instance.send_message.call_args + msg = args[0] + if "URGENT:" in msg['Subject']: + print("SUCCESS: Urgent email sent.") + else: + print(f"FAILURE: Wrong subject: {msg['Subject']}") + else: + pass + +if __name__ == "__main__": + # Mock is_log_file to be true so Critical clean attempts it + disk_cleaner.is_log_file = MagicMock(return_value=(True, "Yes")) + + # Mock shrink_file_inplace to succeed + disk_cleaner.shrink_file_inplace = MagicMock(return_value=(True, "Shrunk")) + + run_test() diff --git a/test_logger.py b/test_logger.py new file mode 100644 index 0000000..a156a67 --- /dev/null +++ b/test_logger.py @@ -0,0 +1,17 @@ +import time +import os +import sys + +log_file = "test_app.log" + +print(f"Logger starting. PID: {os.getpid()}") +with open(log_file, "w") as f: + i = 0 + while True: + # Write 10KB of data per iteration + data = "X" * 10240 + timestamp = time.strftime("%Y-%m-%d %H:%M:%S") + f.write(f"{timestamp} - Log line {i} - {data}\n") + f.flush() + i += 1 + time.sleep(0.01) diff --git a/time_tracker b/time_tracker new file mode 160000 index 0000000..89e5e8a --- /dev/null +++ b/time_tracker @@ -0,0 +1 @@ +Subproject commit 89e5e8a30345080dd9586f97d7ff78498fb49346 diff --git a/video_optimizer.rb b/video_optimizer.rb new file mode 100644 index 0000000..24c3a63 --- /dev/null +++ b/video_optimizer.rb @@ -0,0 +1,288 @@ +require 'optparse' +require 'fileutils' +require 'open3' +require 'json' + +# Configuration +# Default target is loosely based on user preference, but we use auto-density usually. +options = { + threshold: 5.0, # Default 5 GB/hr + dry_run: false, + quality: 29 # Default CQ +} + +OptionParser.new do |opts| + opts.banner = "Usage: ruby video_optimizer.rb [directory] [options]" + + opts.on("-t", "--threshold N", Float, "Minimum GB per hour to trigger processing (default: 5.0)") do |v| + options[:threshold] = v + end + + opts.on("-n", "--dry-run", "Identify files but do not transcode") do + options[:dry_run] = true + end + + opts.on("-q", "--quality N", Integer, "Set Constant Quality (CQ/CRF) value (Overrides auto-density)") do |v| + options[:cq] = v + options[:mode] = :cq + end + + opts.on("-b", "--bitrate RATE", "Set fixed bitrate (e.g. '2M', '2000k')") do |v| + options[:bitrate] = v + options[:mode] = :bitrate + end + + opts.on("-s", "--target-size GB", Float, "Target file size in GB") do |v| + options[:target_size] = v + options[:mode] = :size + end + + opts.on("-h", "--help", "Prints this help") do + puts opts + exit + end +end.parse! + +# Default mode +options[:mode] ||= :auto_density + +directory = ARGV[0] +if directory.nil? || !Dir.exist?(directory) + puts "Error: Please provide a valid directory." + exit 1 +end + +# --- ENCODER LOGIC --- + +def detect_encoders + stdout, _, _ = Open3.capture3('ffmpeg', '-v', 'quiet', '-encoders') + encoders = [] + encoders << :nvenc if stdout.include?("hevc_nvenc") + encoders << :qsv if stdout.include?("hevc_qsv") + encoders << :cpu if stdout.include?("libx265") + encoders +end + +def select_best_encoder(available_encoders) + if available_encoders.include?(:nvenc) + puts " [Encoder] Selected: NVIDIA (hevc_nvenc)" + return :nvenc + elsif available_encoders.include?(:qsv) + puts " [Encoder] Selected: Intel QSV (hevc_qsv)" + return :qsv + elsif available_encoders.include?(:cpu) + puts " [Encoder] Selected: CPU (libx265)" + return :cpu + else + puts " [Error] No suitable HEVC encoder found!" + exit 1 + end +end + +def build_encoder_args(encoder, mode, target_val) + args = [] + + case encoder + when :nvenc + args << '-c:v' << 'hevc_nvenc' << '-preset' << 'p4' + case mode + when :cq + args << '-rc' << 'constqp' << '-qp' << target_val.to_s + when :bitrate, :size # Both use target bitrate + args << '-rc' << 'vbr' << '-b:v' << target_val << '-maxrate' << target_val << '-bufsize' << (target_val.to_i * 2).to_s + end + + when :qsv + args << '-c:v' << 'hevc_qsv' << '-preset' << 'veryfast' << '-load_plugin' << 'hevc_hw' + case mode + when :cq + # QSV uses -global_quality or -q:v for ICQ. roughly maps to CRF. + args << '-global_quality' << target_val.to_s + when :bitrate, :size + # QSV VBR + args << '-b:v' << target_val << '-maxrate' << target_val + end + + when :cpu + args << '-c:v' << 'libx265' << '-preset' << 'medium' + case mode + when :cq + args << '-crf' << target_val.to_s + when :bitrate, :size + args << '-b:v' << target_val << '-maxrate' << target_val << '-bufsize' << (target_val.to_i * 2).to_s + end + end + + args +end + +# --- METADATA & UTILS --- + +def get_video_metadata(file_path) + cmd = ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', '-select_streams', 'v:0', file_path] + stdout, stderr, status = Open3.capture3(*cmd) + return nil unless status.success? + + begin + data = JSON.parse(stdout) + format_info = data['format'] + stream_info = data['streams'][0] + return nil unless format_info && stream_info + + size_bytes = format_info['size'].to_i + duration_sec = format_info['duration'].to_f + codec = stream_info['codec_name'] + + fps_str = stream_info['avg_frame_rate'] + if fps_str && fps_str.include?('/') + num, den = fps_str.split('/').map(&:to_f) + fps = (den > 0) ? (num / den) : 30.0 + else + fps = 30.0 + end + + return nil if size_bytes == 0 || duration_sec == 0 + + { size_bytes: size_bytes, duration_sec: duration_sec, codec: codec, fps: fps } + rescue => e + puts "Failed to parse metadata: #{e.message}" + nil + end +end + +def calculate_target_bitrate_from_size(duration_sec, target_gb) + target_bytes = (target_gb * 1024 * 1024 * 1024).to_i + audio_bitrate_bps = 192 * 1000 + estimated_audio_size = duration_sec * (audio_bitrate_bps / 8.0) + target_video_size = target_bytes - estimated_audio_size + return 500_000 if target_video_size <= 0 + (target_video_size * 8) / duration_sec +end + +# --- PROCESS --- + +AVAILABLE_ENCODERS = detect_encoders +BEST_ENCODER = select_best_encoder(AVAILABLE_ENCODERS) + +def process_file(file_path, options) + metadata = get_video_metadata(file_path) + return unless metadata + + accepted_codecs = ['h264', 'avc1'] + return unless accepted_codecs.include?(metadata[:codec].to_s.downcase) + + size_gb = metadata[:size_bytes].to_f / (1024**3) + hours = metadata[:duration_sec] / 3600.0 + gb_per_hour = size_gb / hours + + if gb_per_hour >= options[:threshold] + puts "\n[MATCH] #{file_path}" + puts " Size: #{size_gb.round(2)} GB, Density: #{gb_per_hour.round(2)} GB/hr" + + if options[:dry_run] + puts " [Dry Run] Would transcode this file." + return + end + + # Determine Encode Mode & Target Value + mode = :cq + target_val = 29 # Default CQ + + if options[:mode] == :auto_density + target_gb_hr = options[:threshold] / 2.0 + target_gb = hours * target_gb_hr + bitrate = calculate_target_bitrate_from_size(metadata[:duration_sec], target_gb) + + mode = :bitrate # Use bitrate targeting for density match + target_val = bitrate.to_i.to_s # e.g. "2000000" + puts " Auto-Density Target: #{target_gb.round(2)} GB (#{target_gb_hr} GB/hr) -> #{target_val.to_i/1000} kbps" + + elsif options[:mode] == :size + bitrate = calculate_target_bitrate_from_size(metadata[:duration_sec], options[:target_size]) + mode = :bitrate + target_val = bitrate.to_i.to_s + puts " Target Size: #{options[:target_size]} GB -> #{target_val.to_i/1000} kbps" + + elsif options[:mode] == :bitrate + mode = :bitrate + target_val = options[:bitrate] # User string e.g. "2M" + puts " Target Bitrate: #{target_val}" + + else # :cq + mode = :cq + target_val = options[:cq] || 29 + puts " Target Quality (CQ/CRF): #{target_val}" + end + + # Build flags + encoder_args = build_encoder_args(BEST_ENCODER, mode, target_val) + + # Common Plex/Optimization Args + gop_size = (metadata[:fps] * 2).round + common_args = [ + '-pix_fmt', 'yuv420p', # 8-bit HEVC (Main Profile) + '-g', "#{gop_size}", # Fixed GOP + '-keyint_min', "#{gop_size}", + '-movflags', '+faststart', # Streaming optimization + '-c:a', 'copy' + ] + + original_ext = File.extname(file_path) + temp_file = file_path.sub(original_ext, ".tmp#{original_ext}") + + cmd = ['ffmpeg', '-y', '-hwaccel', 'auto', '-i', file_path] + encoder_args + common_args + [temp_file] + + puts " Starting transcode with #{BEST_ENCODER}..." + start_time = Time.now + + stdout, stderr, status = Open3.capture3(*cmd) + + if status.success? + duration = Time.now - start_time + puts " Done in #{duration.round(1)}s." + + # Validation + if File.exist?(temp_file) && File.size(temp_file) > 1000 + new_size_gb = File.size(temp_file).to_f / (1024**3) + reduction = ((size_gb - new_size_gb) / size_gb * 100.0).round(1) + + puts " New Size: #{new_size_gb.round(2)} GB (#{reduction}% reduction)." + + if reduction < 33.0 + puts " [SKIP] Reduction too low (<33%). Keeping original." + File.delete(temp_file) + else + # Verification + new_meta = get_video_metadata(temp_file) + if new_meta && (new_meta[:duration_sec] - metadata[:duration_sec]).abs < 30 + File.delete(file_path) + File.rename(temp_file, file_path) + puts " [SUCCESS] File replaced." + else + puts " [ERROR] Duration mismatch. Keeping original." + File.delete(temp_file) + end + end + else + puts " [ERROR] Output invalid." + end + else + puts " [ERROR] Transcode failed." + puts stderr # Debug + File.delete(temp_file) if File.exist?(temp_file) + end + end +end + +puts "Scanning #{directory}..." +files = [] +Dir.glob("#{directory}/**/*").each do |f| + next if File.directory?(f) + next unless ['.mp4', '.mkv', '.avi', '.mov', '.m4v'].include?(File.extname(f).downcase) + files << [f, File.size(f)] rescue nil +end + +puts "Sorting #{files.count} files by size..." +files.sort_by! { |_, size| -size } + +files.each { |f, _| process_file(f, options) }