Initial commit: Intelligent Disk Cleaner
This commit is contained in:
468
disk_cleaner.py
Normal file
468
disk_cleaner.py
Normal file
@@ -0,0 +1,468 @@
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import smtplib
|
||||
import re
|
||||
import socket
|
||||
import glob
|
||||
from email.message import EmailMessage
|
||||
|
||||
import json
|
||||
import time
|
||||
|
||||
# --- Configuration ---
|
||||
THRESHOLD_PERCENT = 95.0
|
||||
WARNING_THRESHOLD_PERCENT = 80.0
|
||||
# Regex to match somewhat standard log timestamps (e.g. YYYY-MM-DD, MMM DD, ISO8601)
|
||||
LOG_TIMESTAMP_REGEX = r'(\d{4}-\d{2}-\d{2}|\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})'
|
||||
# Email settings - placeholders
|
||||
SMTP_SERVER = "127.0.0.1"
|
||||
SMTP_PORT = 25
|
||||
EMAIL_FROM = "diskcleaner@example.com"
|
||||
EMAIL_TO = ["admins@example.com"]
|
||||
STATE_FILE = "/tmp/disk_cleaner_state.json"
|
||||
RATE_LIMIT_SECONDS = 8 * 3600
|
||||
|
||||
def load_state():
|
||||
try:
|
||||
if os.path.exists(STATE_FILE):
|
||||
with open(STATE_FILE, 'r') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load state file: {e}")
|
||||
return {}
|
||||
|
||||
def save_state(state):
|
||||
try:
|
||||
with open(STATE_FILE, 'w') as f:
|
||||
json.dump(state, f)
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not save state file: {e}")
|
||||
|
||||
def should_send_email(mountpoint, state):
|
||||
"""
|
||||
Returns True if we should send an email for this mountpoint.
|
||||
Checks against the 8-hour cooldown.
|
||||
"""
|
||||
last_sent = state.get(mountpoint, 0)
|
||||
if time.time() - last_sent < RATE_LIMIT_SECONDS:
|
||||
return False
|
||||
return True
|
||||
|
||||
def record_email_sent(mountpoint, state):
|
||||
state[mountpoint] = time.time()
|
||||
save_state(state)
|
||||
|
||||
def send_email(subject, body, mountpoint=None):
|
||||
# If mountpoint is provided, check rate limit
|
||||
if mountpoint:
|
||||
state = load_state()
|
||||
if not should_send_email(mountpoint, state):
|
||||
print(f"Rate limit active for {mountpoint}. Suppressing email: {subject}")
|
||||
return
|
||||
|
||||
msg = EmailMessage()
|
||||
msg.set_content(body)
|
||||
msg['Subject'] = subject
|
||||
msg['From'] = EMAIL_FROM
|
||||
msg['To'] = ", ".join(EMAIL_TO)
|
||||
|
||||
try:
|
||||
# In a real scenario, might need login/auth
|
||||
with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as s:
|
||||
s.send_message(msg)
|
||||
print(f"Email sent: {subject}")
|
||||
|
||||
if mountpoint:
|
||||
record_email_sent(mountpoint, state)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to send email: {e}")
|
||||
|
||||
def get_partitions():
|
||||
"""
|
||||
Parses /proc/mounts to get list of mounted filesystems.
|
||||
Returns list of (device, mountpoint, fstype).
|
||||
"""
|
||||
partitions = []
|
||||
if not os.path.exists('/proc/mounts'):
|
||||
# Fallback for non-Linux or testing environments without /proc mocks
|
||||
return []
|
||||
|
||||
try:
|
||||
with open('/proc/mounts', 'r') as f:
|
||||
for line in f:
|
||||
parts = line.strip().split()
|
||||
if len(parts) >= 3:
|
||||
device, mountpoint, fstype = parts[0], parts[1], parts[2]
|
||||
# Filter out pseudo-filesystems
|
||||
if fstype not in ('proc', 'sysfs', 'devtmpfs', 'devpts', 'tmpfs', 'cgroup', 'squashfs'):
|
||||
# rudimentary check: usually we want physical devices or LVM
|
||||
if device.startswith('/dev/'):
|
||||
partitions.append((device, mountpoint))
|
||||
except Exception as e:
|
||||
print(f"Error reading /proc/mounts: {e}")
|
||||
return partitions
|
||||
|
||||
def get_process_name_pid(pid):
|
||||
"""
|
||||
Reads /proc/[pid]/comm or cmdline to get process name.
|
||||
"""
|
||||
try:
|
||||
with open(f'/proc/{pid}/comm', 'r') as f:
|
||||
return f.read().strip()
|
||||
except:
|
||||
return "unknown"
|
||||
|
||||
def get_open_files_flat():
|
||||
"""
|
||||
Walks /proc to find all open files.
|
||||
Returns a list of dicts: {'path': str, 'pid': int, 'size': int}
|
||||
"""
|
||||
open_files = []
|
||||
# Iterate over all PIDs in /proc
|
||||
if not os.path.exists('/proc'):
|
||||
return []
|
||||
|
||||
for pid_dir in os.listdir('/proc'):
|
||||
if not pid_dir.isdigit():
|
||||
continue
|
||||
|
||||
pid = int(pid_dir)
|
||||
fd_dir = f'/proc/{pid}/fd'
|
||||
|
||||
try:
|
||||
# os.listdir might fail if process vanishes
|
||||
fds = os.listdir(fd_dir)
|
||||
except (FileNotFoundError, PermissionError):
|
||||
continue
|
||||
|
||||
for fd in fds:
|
||||
try:
|
||||
# Resolve the symlink to get the real file path
|
||||
link_path = os.path.join(fd_dir, fd)
|
||||
real_path = os.readlink(link_path)
|
||||
|
||||
# Check if it's a regular file (not a pipe/socket)
|
||||
if real_path.startswith('/') and os.path.isfile(real_path):
|
||||
# Get size
|
||||
size = os.path.getsize(real_path)
|
||||
open_files.append({
|
||||
'path': real_path,
|
||||
'pid': pid,
|
||||
'size': size
|
||||
})
|
||||
except (OSError, FileNotFoundError):
|
||||
continue
|
||||
|
||||
return open_files
|
||||
|
||||
def is_log_file(file_path):
|
||||
"""
|
||||
Intelligent check:
|
||||
1. 'log' in path (case insensitive)
|
||||
2. Read first chunk, look for timestamp-like patterns.
|
||||
"""
|
||||
if "log" not in file_path.lower():
|
||||
return False, "Filename does not contain 'log'"
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', errors='ignore') as f:
|
||||
chunk = f.read(4096)
|
||||
if re.search(LOG_TIMESTAMP_REGEX, chunk):
|
||||
return True, "Found timestamps"
|
||||
else:
|
||||
return False, "No timestamps found in header"
|
||||
except Exception as e:
|
||||
return False, f"Read error: {e}"
|
||||
|
||||
def shrink_file_inplace(file_path):
|
||||
"""
|
||||
Removes the first 50% of the file data in-place.
|
||||
"""
|
||||
try:
|
||||
file_size = os.path.getsize(file_path)
|
||||
if file_size == 0:
|
||||
return False, "File is empty"
|
||||
|
||||
midpoint = file_size // 2
|
||||
chunk_size = 1024 * 1024 * 10 # 10MB chunks
|
||||
|
||||
print(f"Shrinking {file_path} ({file_size} bytes). Removing first {midpoint} bytes.")
|
||||
|
||||
with open(file_path, "r+b") as f:
|
||||
read_pos = midpoint
|
||||
write_pos = 0
|
||||
|
||||
while read_pos < file_size:
|
||||
f.seek(read_pos)
|
||||
data = f.read(chunk_size)
|
||||
bytes_read = len(data)
|
||||
|
||||
if bytes_read == 0:
|
||||
break
|
||||
|
||||
f.seek(write_pos)
|
||||
f.write(data)
|
||||
|
||||
read_pos += bytes_read
|
||||
write_pos += bytes_read
|
||||
|
||||
f.truncate(write_pos)
|
||||
|
||||
print(f"Successfully shrunk {file_path} to {write_pos} bytes.")
|
||||
return True, f"Removed first {midpoint} bytes. New size: {write_pos}"
|
||||
except Exception as e:
|
||||
return False, f"Error shrinking file: {e}"
|
||||
|
||||
|
||||
def is_rotated_log(filename):
|
||||
"""
|
||||
Checks if a filename looks like a rotated log.
|
||||
Common patterns:
|
||||
- .gz, .zip, .tar, .bz2
|
||||
- .old, .bak
|
||||
- .log.1, .log.2, ...
|
||||
- .log-20240101, ...
|
||||
"""
|
||||
# Simple extensions
|
||||
if filename.lower().endswith(('.gz', '.zip', '.tar', '.bz2', '.old', '.bak')):
|
||||
return True
|
||||
|
||||
# Numeric suffixes (.1, .2, etc)
|
||||
if re.search(r'\.log\.\d+$', filename, re.IGNORECASE):
|
||||
return True
|
||||
|
||||
# Date suffixes (log-YYYYMMDD, etc)
|
||||
# This is a bit loose, be careful not to match everything.
|
||||
# Look for 8 digits at end or near end?
|
||||
if re.search(r'[-_.]\d{8}([-_.]|$)', filename):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def find_rotated_logs(mountpoint):
|
||||
"""
|
||||
Walks the mountpoint to find rotated logs.
|
||||
Returns list of (path, size, mtime).
|
||||
"""
|
||||
candidates = []
|
||||
print(f"Scanning {mountpoint} for rotated logs...")
|
||||
|
||||
try:
|
||||
mount_dev = os.stat(mountpoint).st_dev
|
||||
except OSError:
|
||||
return []
|
||||
|
||||
for root, dirs, files in os.walk(mountpoint):
|
||||
# Don't cross filesystems
|
||||
try:
|
||||
if os.stat(root).st_dev != mount_dev:
|
||||
# Remove subdirs from traversal to prevent descending
|
||||
dirs[:] = []
|
||||
continue
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
for file in files:
|
||||
if is_rotated_log(file):
|
||||
full_path = os.path.join(root, file)
|
||||
try:
|
||||
stats = os.stat(full_path)
|
||||
candidates.append((full_path, stats.st_size, stats.st_mtime))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Sort old -> new
|
||||
candidates.sort(key=lambda x: x[2])
|
||||
return candidates
|
||||
|
||||
def check_disk_usage_percent(mountpoint):
|
||||
try:
|
||||
usage = shutil.disk_usage(mountpoint)
|
||||
return (usage.used / usage.total) * 100
|
||||
except OSError:
|
||||
return 100.0
|
||||
|
||||
def cleanup_rotated_logs(mountpoint, hostname):
|
||||
"""
|
||||
Deletes oldest rotated logs until usage < 80%.
|
||||
"""
|
||||
candidates = find_rotated_logs(mountpoint)
|
||||
deleted_count = 0
|
||||
deleted_bytes = 0
|
||||
deleted_files = []
|
||||
|
||||
current_usage = check_disk_usage_percent(mountpoint)
|
||||
|
||||
for path, size, mtime in candidates:
|
||||
if current_usage <= THRESHOLD_PERCENT:
|
||||
break
|
||||
|
||||
print(f"Deleting old rotated log: {path} ({size} bytes, mtime: {mtime})")
|
||||
try:
|
||||
os.remove(path)
|
||||
deleted_count += 1
|
||||
deleted_bytes += size
|
||||
|
||||
# Re-check usage
|
||||
current_usage = check_disk_usage_percent(mountpoint)
|
||||
deleted_files.append(os.path.basename(path))
|
||||
|
||||
except OSError as e:
|
||||
print(f"Failed to delete {path}: {e}")
|
||||
|
||||
if deleted_count > 0:
|
||||
subject = f"URGENT: Rotated Log Cleanup - {hostname} - {mountpoint}"
|
||||
# Truncate list if too long
|
||||
file_list_str = ", ".join(deleted_files[:10])
|
||||
if len(deleted_files) > 10:
|
||||
file_list_str += f" and {len(deleted_files)-10} others"
|
||||
|
||||
body = (f"Volume {mountpoint} was full.\n"
|
||||
f"Action: Deleted {deleted_count} old rotated log files.\n"
|
||||
f"Total freed: {deleted_bytes / 1024 / 1024:.2f} MB.\n"
|
||||
f"Files: {file_list_str}\n"
|
||||
f"Current Usage: {current_usage:.1f}%")
|
||||
send_email(subject, body, mountpoint)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def check_and_clean():
|
||||
if os.name == 'nt':
|
||||
print("Note: This script is designed for Linux (/proc). Windows execution will miss process data.")
|
||||
|
||||
hostname = socket.gethostname()
|
||||
|
||||
# 1. Get Partitions
|
||||
partitions = get_partitions()
|
||||
if not partitions:
|
||||
print("No partitions found via /proc/mounts. (Are you on Windows?)")
|
||||
partitions = [('/dev/root', '/')]
|
||||
|
||||
# 2. Identify Metadata
|
||||
critical_partitions = []
|
||||
warning_partitions = []
|
||||
|
||||
for device, mountpoint in partitions:
|
||||
try:
|
||||
percent = check_disk_usage_percent(mountpoint)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if percent > THRESHOLD_PERCENT:
|
||||
print(f"CRITICAL: Volume {mountpoint} ({device}) is at {percent:.1f}% usage.")
|
||||
critical_partitions.append(mountpoint)
|
||||
elif percent > WARNING_THRESHOLD_PERCENT:
|
||||
print(f"WARNING: Volume {mountpoint} ({device}) is at {percent:.1f}% usage.")
|
||||
warning_partitions.append(mountpoint)
|
||||
|
||||
if not critical_partitions and not warning_partitions:
|
||||
print("All volumes are healthy.")
|
||||
return
|
||||
|
||||
# 3. Found partitions. Now scan processes.
|
||||
print("High usage detected. Scanning /proc for open files...")
|
||||
all_open_files = get_open_files_flat()
|
||||
|
||||
# --- PROCESS CRITICAL ---
|
||||
for mountpoint in critical_partitions:
|
||||
current_percent = check_disk_usage_percent(mountpoint)
|
||||
if current_percent <= THRESHOLD_PERCENT:
|
||||
continue
|
||||
|
||||
# Strategy A: Shrink Open Files
|
||||
candidates = []
|
||||
for file_info in all_open_files:
|
||||
path = file_info['path']
|
||||
if mountpoint == '/':
|
||||
try:
|
||||
if os.stat(path).st_dev == os.stat(mountpoint).st_dev:
|
||||
candidates.append(file_info)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
if path.startswith(mountpoint):
|
||||
candidates.append(file_info)
|
||||
|
||||
candidates.sort(key=lambda x: x['size'], reverse=True)
|
||||
|
||||
shrunk_something = False
|
||||
for candidate in candidates:
|
||||
path = candidate['path']
|
||||
size = candidate['size']
|
||||
|
||||
is_log, reason = is_log_file(path)
|
||||
if is_log:
|
||||
pid = candidate['pid']
|
||||
proc_name = get_process_name_pid(pid)
|
||||
print(f"Found candidate: {path} ({size} bytes), held by {proc_name} (PID {pid})")
|
||||
|
||||
success, msg = shrink_file_inplace(path)
|
||||
if success:
|
||||
subject = f"URGENT: Disk Cleanup Action - {hostname} - {mountpoint}"
|
||||
body = (f"Volume {mountpoint} was >{THRESHOLD_PERCENT}%.\n"
|
||||
f"Identified large log file: {path}\n"
|
||||
f"Process holding file: {proc_name} (PID {pid})\n"
|
||||
f"Action: {msg}\n")
|
||||
send_email(subject, body, mountpoint)
|
||||
shrunk_something = True
|
||||
break # Re-evaluate usage
|
||||
|
||||
# Check if Strategy A was enough
|
||||
if check_disk_usage_percent(mountpoint) <= THRESHOLD_PERCENT:
|
||||
print(f"Volume {mountpoint} is now safe.")
|
||||
continue
|
||||
|
||||
# Strategy B: Rotated Logs Fallback
|
||||
print(f"Active log shrinking insufficient or unavailable. Checking for rotated logs on {mountpoint}...")
|
||||
cleanup_success = cleanup_rotated_logs(mountpoint, hostname)
|
||||
|
||||
if not cleanup_success and not shrunk_something:
|
||||
print(f"No suitable log file found to clean on {mountpoint}.")
|
||||
subject = f"CRITICAL: Disk Full - {hostname} - {mountpoint}"
|
||||
|
||||
suspected_culprit = "Unknown"
|
||||
if candidates:
|
||||
top_cand = candidates[0]
|
||||
suspected_culprit = f"{top_cand['path']} ({top_cand['size'] / 1024 / 1024:.2f} MB)"
|
||||
|
||||
body = (f"Volume {mountpoint} is >{THRESHOLD_PERCENT}%.\n"
|
||||
f"Could not find any suitable open log files or rotated logs to clean automatically.\n"
|
||||
f"Suspected largest open file: {suspected_culprit}\n"
|
||||
f"Usage is still {check_disk_usage_percent(mountpoint):.1f}%.")
|
||||
send_email(subject, body, mountpoint)
|
||||
|
||||
# --- PROCESS WARNINGS ---
|
||||
for mountpoint in warning_partitions:
|
||||
# Find culprits but DO NOT TOUCH
|
||||
candidates = []
|
||||
for file_info in all_open_files:
|
||||
path = file_info['path']
|
||||
if mountpoint == '/':
|
||||
try:
|
||||
if os.stat(path).st_dev == os.stat(mountpoint).st_dev:
|
||||
candidates.append(file_info)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
if path.startswith(mountpoint):
|
||||
candidates.append(file_info)
|
||||
|
||||
candidates.sort(key=lambda x: x['size'], reverse=True)
|
||||
|
||||
suspected_culprit = "Unknown"
|
||||
if candidates:
|
||||
top_cand = candidates[0]
|
||||
suspected_culprit = f"{top_cand['path']} ({top_cand['size'] / 1024 / 1024:.2f} MB)"
|
||||
|
||||
subject = f"WARNING: Disk Usage High - {hostname} - {mountpoint}"
|
||||
body = (f"Volume {mountpoint} is >{WARNING_THRESHOLD_PERCENT}% (Current: {check_disk_usage_percent(mountpoint):.1f}%).\n"
|
||||
f"Threshold for automatic cleanup is {THRESHOLD_PERCENT}%.\n"
|
||||
f"Suspected largest open file: {suspected_culprit}\n"
|
||||
f"Please investigate.")
|
||||
send_email(subject, body, mountpoint)
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_and_clean()
|
||||
Reference in New Issue
Block a user