Initial commit: Intelligent Disk Cleaner

2026-02-03 14:36:25 -05:00
commit ec7625d4d9
11 changed files with 1769 additions and 0 deletions
--- a/disk_cleaner.py
+++ b/disk_cleaner.py
@@ -0,0 +1,468 @@
+import os
+import sys
+import shutil
+import smtplib
+import re
+import socket
+import glob
+from email.message import EmailMessage
+
+import json
+import time
+
+# --- Configuration ---
+THRESHOLD_PERCENT = 95.0
+WARNING_THRESHOLD_PERCENT = 80.0
+# Regex to match somewhat standard log timestamps (e.g. YYYY-MM-DD, MMM DD, ISO8601)
+LOG_TIMESTAMP_REGEX = r'(\d{4}-\d{2}-\d{2}|\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})'
+# Email settings - placeholders
+SMTP_SERVER = "127.0.0.1"
+SMTP_PORT = 25
+EMAIL_FROM = "diskcleaner@example.com"
+EMAIL_TO = ["admins@example.com"]
+STATE_FILE = "/tmp/disk_cleaner_state.json"
+RATE_LIMIT_SECONDS = 8 * 3600
+
+def load_state():
+    try:
+        if os.path.exists(STATE_FILE):
+            with open(STATE_FILE, 'r') as f:
+                return json.load(f)
+    except Exception as e:
+        print(f"Warning: Could not load state file: {e}")
+    return {}
+
+def save_state(state):
+    try:
+        with open(STATE_FILE, 'w') as f:
+            json.dump(state, f)
+    except Exception as e:
+        print(f"Warning: Could not save state file: {e}")
+
+def should_send_email(mountpoint, state):
+    """
+    Returns True if we should send an email for this mountpoint.
+    Checks against the 8-hour cooldown.
+    """
+    last_sent = state.get(mountpoint, 0)
+    if time.time() - last_sent < RATE_LIMIT_SECONDS:
+        return False
+    return True
+
+def record_email_sent(mountpoint, state):
+    state[mountpoint] = time.time()
+    save_state(state)
+
+def send_email(subject, body, mountpoint=None):
+    # If mountpoint is provided, check rate limit
+    if mountpoint:
+        state = load_state()
+        if not should_send_email(mountpoint, state):
+            print(f"Rate limit active for {mountpoint}. Suppressing email: {subject}")
+            return
+    
+    msg = EmailMessage()
+    msg.set_content(body)
+    msg['Subject'] = subject
+    msg['From'] = EMAIL_FROM
+    msg['To'] = ", ".join(EMAIL_TO)
+
+    try:
+        # In a real scenario, might need login/auth
+        with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as s:
+            s.send_message(msg)
+        print(f"Email sent: {subject}")
+        
+        if mountpoint:
+            record_email_sent(mountpoint, state)
+            
+    except Exception as e:
+        print(f"Failed to send email: {e}")
+
+def get_partitions():
+    """
+    Parses /proc/mounts to get list of mounted filesystems.
+    Returns list of (device, mountpoint, fstype).
+    """
+    partitions = []
+    if not os.path.exists('/proc/mounts'):
+        # Fallback for non-Linux or testing environments without /proc mocks
+        return []
+
+    try:
+        with open('/proc/mounts', 'r') as f:
+            for line in f:
+                parts = line.strip().split()
+                if len(parts) >= 3:
+                    device, mountpoint, fstype = parts[0], parts[1], parts[2]
+                    # Filter out pseudo-filesystems
+                    if fstype not in ('proc', 'sysfs', 'devtmpfs', 'devpts', 'tmpfs', 'cgroup', 'squashfs'):
+                        # rudimentary check: usually we want physical devices or LVM
+                        if device.startswith('/dev/'):
+                           partitions.append((device, mountpoint))
+    except Exception as e:
+        print(f"Error reading /proc/mounts: {e}")
+    return partitions
+
+def get_process_name_pid(pid):
+    """
+    Reads /proc/[pid]/comm or cmdline to get process name.
+    """
+    try:
+        with open(f'/proc/{pid}/comm', 'r') as f:
+            return f.read().strip()
+    except:
+        return "unknown"
+
+def get_open_files_flat():
+    """
+    Walks /proc to find all open files.
+    Returns a list of dicts: {'path': str, 'pid': int, 'size': int}
+    """
+    open_files = []
+    # Iterate over all PIDs in /proc
+    if not os.path.exists('/proc'):
+        return []
+
+    for pid_dir in os.listdir('/proc'):
+        if not pid_dir.isdigit():
+            continue
+        
+        pid = int(pid_dir)
+        fd_dir = f'/proc/{pid}/fd'
+        
+        try:
+            # os.listdir might fail if process vanishes
+            fds = os.listdir(fd_dir)
+        except (FileNotFoundError, PermissionError):
+            continue
+
+        for fd in fds:
+            try:
+                # Resolve the symlink to get the real file path
+                link_path = os.path.join(fd_dir, fd)
+                real_path = os.readlink(link_path)
+                
+                # Check if it's a regular file (not a pipe/socket)
+                if real_path.startswith('/') and os.path.isfile(real_path):
+                     # Get size
+                     size = os.path.getsize(real_path)
+                     open_files.append({
+                         'path': real_path, 
+                         'pid': pid, 
+                         'size': size
+                     })
+            except (OSError, FileNotFoundError):
+                continue
+                
+    return open_files
+
+def is_log_file(file_path):
+    """
+    Intelligent check:
+    1. 'log' in path (case insensitive)
+    2. Read first chunk, look for timestamp-like patterns.
+    """
+    if "log" not in file_path.lower():
+        return False, "Filename does not contain 'log'"
+
+    try:
+        with open(file_path, 'r', errors='ignore') as f:
+            chunk = f.read(4096)
+            if re.search(LOG_TIMESTAMP_REGEX, chunk):
+                return True, "Found timestamps"
+            else:
+                return False, "No timestamps found in header"
+    except Exception as e:
+        return False, f"Read error: {e}"
+
+def shrink_file_inplace(file_path):
+    """
+    Removes the first 50% of the file data in-place.
+    """
+    try:
+        file_size = os.path.getsize(file_path)
+        if file_size == 0:
+            return False, "File is empty"
+
+        midpoint = file_size // 2
+        chunk_size = 1024 * 1024 * 10 # 10MB chunks
+
+        print(f"Shrinking {file_path} ({file_size} bytes). Removing first {midpoint} bytes.")
+
+        with open(file_path, "r+b") as f:
+            read_pos = midpoint
+            write_pos = 0
+            
+            while read_pos < file_size:
+                f.seek(read_pos)
+                data = f.read(chunk_size)
+                bytes_read = len(data)
+                
+                if bytes_read == 0:
+                    break
+                
+                f.seek(write_pos)
+                f.write(data)
+                
+                read_pos += bytes_read
+                write_pos += bytes_read
+            
+            f.truncate(write_pos)
+            
+        print(f"Successfully shrunk {file_path} to {write_pos} bytes.")
+        return True, f"Removed first {midpoint} bytes. New size: {write_pos}"
+    except Exception as e:
+        return False, f"Error shrinking file: {e}"
+
+
+def is_rotated_log(filename):
+    """
+    Checks if a filename looks like a rotated log.
+    Common patterns:
+    - .gz, .zip, .tar, .bz2
+    - .old, .bak
+    - .log.1, .log.2, ...
+    - .log-20240101, ...
+    """
+    # Simple extensions
+    if filename.lower().endswith(('.gz', '.zip', '.tar', '.bz2', '.old', '.bak')):
+        return True
+    
+    # Numeric suffixes (.1, .2, etc)
+    if re.search(r'\.log\.\d+$', filename, re.IGNORECASE):
+        return True
+        
+    # Date suffixes (log-YYYYMMDD, etc)
+    # This is a bit loose, be careful not to match everything.
+    # Look for 8 digits at end or near end?
+    if re.search(r'[-_.]\d{8}([-_.]|$)', filename):
+        return True
+        
+    return False
+
+def find_rotated_logs(mountpoint):
+    """
+    Walks the mountpoint to find rotated logs.
+    Returns list of (path, size, mtime).
+    """
+    candidates = []
+    print(f"Scanning {mountpoint} for rotated logs...")
+    
+    try:
+        mount_dev = os.stat(mountpoint).st_dev
+    except OSError:
+        return []
+
+    for root, dirs, files in os.walk(mountpoint):
+        # Don't cross filesystems
+        try:
+            if os.stat(root).st_dev != mount_dev:
+                # Remove subdirs from traversal to prevent descending
+                dirs[:] = []
+                continue
+        except OSError:
+            continue
+            
+        for file in files:
+            if is_rotated_log(file):
+                full_path = os.path.join(root, file)
+                try:
+                    stats = os.stat(full_path)
+                    candidates.append((full_path, stats.st_size, stats.st_mtime))
+                except OSError:
+                    pass
+                    
+    # Sort old -> new
+    candidates.sort(key=lambda x: x[2])
+    return candidates
+
+def check_disk_usage_percent(mountpoint):
+    try:
+        usage = shutil.disk_usage(mountpoint)
+        return (usage.used / usage.total) * 100
+    except OSError:
+        return 100.0
+
+def cleanup_rotated_logs(mountpoint, hostname):
+    """
+    Deletes oldest rotated logs until usage < 80%.
+    """
+    candidates = find_rotated_logs(mountpoint)
+    deleted_count = 0
+    deleted_bytes = 0
+    deleted_files = []
+    
+    current_usage = check_disk_usage_percent(mountpoint)
+    
+    for path, size, mtime in candidates:
+        if current_usage <= THRESHOLD_PERCENT:
+            break
+            
+        print(f"Deleting old rotated log: {path} ({size} bytes, mtime: {mtime})")
+        try:
+            os.remove(path)
+            deleted_count += 1
+            deleted_bytes += size
+            
+            # Re-check usage
+            current_usage = check_disk_usage_percent(mountpoint)
+            deleted_files.append(os.path.basename(path))
+            
+        except OSError as e:
+            print(f"Failed to delete {path}: {e}")
+
+    if deleted_count > 0:
+        subject = f"URGENT: Rotated Log Cleanup - {hostname} - {mountpoint}"
+        # Truncate list if too long
+        file_list_str = ", ".join(deleted_files[:10])
+        if len(deleted_files) > 10:
+             file_list_str += f" and {len(deleted_files)-10} others"
+             
+        body = (f"Volume {mountpoint} was full.\n"
+                f"Action: Deleted {deleted_count} old rotated log files.\n"
+                f"Total freed: {deleted_bytes / 1024 / 1024:.2f} MB.\n"
+                f"Files: {file_list_str}\n"
+                f"Current Usage: {current_usage:.1f}%")
+        send_email(subject, body, mountpoint)
+        return True
+    
+    return False
+
+def check_and_clean():
+    if os.name == 'nt':
+        print("Note: This script is designed for Linux (/proc). Windows execution will miss process data.")
+    
+    hostname = socket.gethostname()
+    
+    # 1. Get Partitions
+    partitions = get_partitions()
+    if not partitions:
+        print("No partitions found via /proc/mounts. (Are you on Windows?)")
+        partitions = [('/dev/root', '/')]
+
+    # 2. Identify Metadata
+    critical_partitions = []
+    warning_partitions = []
+    
+    for device, mountpoint in partitions:
+        try:
+            percent = check_disk_usage_percent(mountpoint)
+        except OSError:
+            continue
+            
+        if percent > THRESHOLD_PERCENT:
+            print(f"CRITICAL: Volume {mountpoint} ({device}) is at {percent:.1f}% usage.")
+            critical_partitions.append(mountpoint)
+        elif percent > WARNING_THRESHOLD_PERCENT:
+            print(f"WARNING: Volume {mountpoint} ({device}) is at {percent:.1f}% usage.")
+            warning_partitions.append(mountpoint)
+    
+    if not critical_partitions and not warning_partitions:
+        print("All volumes are healthy.")
+        return
+
+    # 3. Found partitions. Now scan processes.
+    print("High usage detected. Scanning /proc for open files...")
+    all_open_files = get_open_files_flat()
+    
+    # --- PROCESS CRITICAL ---
+    for mountpoint in critical_partitions:
+        current_percent = check_disk_usage_percent(mountpoint)
+        if current_percent <= THRESHOLD_PERCENT:
+             continue
+
+        # Strategy A: Shrink Open Files
+        candidates = []
+        for file_info in all_open_files:
+            path = file_info['path']
+            if mountpoint == '/':
+                try:
+                    if os.stat(path).st_dev == os.stat(mountpoint).st_dev:
+                        candidates.append(file_info)
+                except OSError:
+                    pass
+            else:
+                if path.startswith(mountpoint):
+                    candidates.append(file_info)
+        
+        candidates.sort(key=lambda x: x['size'], reverse=True)
+        
+        shrunk_something = False
+        for candidate in candidates:
+            path = candidate['path']
+            size = candidate['size']
+            
+            is_log, reason = is_log_file(path)
+            if is_log:
+                pid = candidate['pid']
+                proc_name = get_process_name_pid(pid)
+                print(f"Found candidate: {path} ({size} bytes), held by {proc_name} (PID {pid})")
+                
+                success, msg = shrink_file_inplace(path)
+                if success:
+                    subject = f"URGENT: Disk Cleanup Action - {hostname} - {mountpoint}"
+                    body = (f"Volume {mountpoint} was >{THRESHOLD_PERCENT}%.\n"
+                            f"Identified large log file: {path}\n"
+                            f"Process holding file: {proc_name} (PID {pid})\n"
+                            f"Action: {msg}\n")
+                    send_email(subject, body, mountpoint)
+                    shrunk_something = True
+                    break # Re-evaluate usage
+        
+        # Check if Strategy A was enough
+        if check_disk_usage_percent(mountpoint) <= THRESHOLD_PERCENT:
+            print(f"Volume {mountpoint} is now safe.")
+            continue
+            
+        # Strategy B: Rotated Logs Fallback
+        print(f"Active log shrinking insufficient or unavailable. Checking for rotated logs on {mountpoint}...")
+        cleanup_success = cleanup_rotated_logs(mountpoint, hostname)
+        
+        if not cleanup_success and not shrunk_something:
+             print(f"No suitable log file found to clean on {mountpoint}.")
+             subject = f"CRITICAL: Disk Full - {hostname} - {mountpoint}"
+             
+             suspected_culprit = "Unknown"
+             if candidates:
+                 top_cand = candidates[0]
+                 suspected_culprit = f"{top_cand['path']} ({top_cand['size'] / 1024 / 1024:.2f} MB)"
+             
+             body = (f"Volume {mountpoint} is >{THRESHOLD_PERCENT}%.\n"
+                     f"Could not find any suitable open log files or rotated logs to clean automatically.\n"
+                     f"Suspected largest open file: {suspected_culprit}\n"
+                     f"Usage is still {check_disk_usage_percent(mountpoint):.1f}%.")
+             send_email(subject, body, mountpoint)
+
+    # --- PROCESS WARNINGS ---
+    for mountpoint in warning_partitions:
+        # Find culprits but DO NOT TOUCH
+        candidates = []
+        for file_info in all_open_files:
+            path = file_info['path']
+            if mountpoint == '/':
+                try:
+                    if os.stat(path).st_dev == os.stat(mountpoint).st_dev:
+                        candidates.append(file_info)
+                except OSError:
+                    pass
+            else:
+                if path.startswith(mountpoint):
+                    candidates.append(file_info)
+        
+        candidates.sort(key=lambda x: x['size'], reverse=True)
+        
+        suspected_culprit = "Unknown"
+        if candidates:
+             top_cand = candidates[0]
+             suspected_culprit = f"{top_cand['path']} ({top_cand['size'] / 1024 / 1024:.2f} MB)"
+        
+        subject = f"WARNING: Disk Usage High - {hostname} - {mountpoint}"
+        body = (f"Volume {mountpoint} is >{WARNING_THRESHOLD_PERCENT}% (Current: {check_disk_usage_percent(mountpoint):.1f}%).\n"
+                f"Threshold for automatic cleanup is {THRESHOLD_PERCENT}%.\n"
+                f"Suspected largest open file: {suspected_culprit}\n"
+                f"Please investigate.")
+        send_email(subject, body, mountpoint)
+
+if __name__ == "__main__":
+    check_and_clean()