Initial commit of wif2ansible

This commit is contained in:
2026-02-06 15:12:49 -05:00
commit aa299df41e
13 changed files with 1025 additions and 0 deletions

20
.gitignore vendored Normal file
View File

@@ -0,0 +1,20 @@
# Excel Files (Data)
*.xls
*.xlsx
# Generated Inventories (Contain Sensitive IPs)
inventory_*.yml
*.yml
# Python Build Artifacts
dist/
build/
*.spec
__pycache__/
*.pyc
*.egg-info/
.pytest_cache/
# IDE settings
.vscode/
.idea/

58
README.md Normal file
View File

@@ -0,0 +1,58 @@
# WIF to Ansible Inventory Converter
This tool converts **Workload Intake Form (WIF)** Excel documents into **Ansible Inventory** YAML files.
## Features
- **Robust Excel Parsing**: Automatically handles shifted cells and ignores hidden headers/rows/columns.
- **Server Filtering**: Only generates flows for servers explicitly defined in the 'Servers' tab.
- **Strict Validation**: Dropped flows are reported in the console output.
## How to Run
### Option 1: Standalone Executable (Windows)
No Python installation required.
1. Navigate to the `dist` folder.
2. Run the executable from the command line, providing your WIF Excel file as an argument:
```powershell
.\dist\wif2ansible.exe "Path\To\Your_WIF.xlsx"
```
The inventory file (e.g., `inventory_YYYY-MM-DD_HHMM.yml`) will be generated in the current directory.
**Optional: Specify Output File**
```powershell
.\dist\wif2ansible.exe "Path\To\Your_WIF.xlsx" -o "my_inventory.yml"
```
### Option 2: Run via Python Source
If you are developing or prefer running the raw script:
1. **Install Requirements**:
```bash
pip install -r requirements.txt
```
2. **Run Module**:
```bash
python -m wif2ansible.main "Path\To\Your_WIF.xlsx"
```
## Requirements for WIF Excel File
- Must contain a **Servers** tab (e.g., "B.Server Info") with columns for `Reference`, `Platform`, and `IP Address`.
- Must contain **Flow** tabs (e.g., "F.Dataflow - Application") with headers for `Source`, `Destination`, and `Port`.
- **Hidden rows and columns are strictly ignored**. Ensure valid data is visible.
## Output
Generates a YAML file compatible with Ansible:
```yaml
all:
hosts:
192.168.1.10:
ansible_connection: winrm
flows:
- dest: 10.0.0.5
ports: [80, 443]
protocol: tcp
```

32
debug_headers.py Normal file
View File

@@ -0,0 +1,32 @@
import openpyxl
import sys
# Force stdout to utf-8 if possible, or just replace bad chars on print
sys.stdout.reconfigure(encoding='utf-8')
from wif2ansible.excel_reader import clean_header
def debug(filename):
wb = openpyxl.load_workbook(filename, data_only=True)
for sname in wb.sheetnames:
if 'application' not in sname.lower():
continue
print(f"--- Sheet: {sname} ---")
sheet = wb[sname]
for r in range(1, 30):
if sheet.row_dimensions[r].hidden:
print(f"Row {r}: [HIDDEN]")
continue
vals = []
for c in range(1, 20): # Scan first 20 cols
v = sheet.cell(row=r, column=c).value
if v:
try:
vals.append(clean_header(v))
except:
vals.append("ERROR")
if vals:
print(f"Row {r}: {vals}")
if __name__ == "__main__":
debug(sys.argv[1])

28
pyproject.toml Normal file
View File

@@ -0,0 +1,28 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "wif2ansible"
version = "0.1.0"
description = "Convert WIF Excel documents to Ansible Inventory"
authors = [
{ name="System Admin", email="admin@example.com" },
]
readme = "README.md"
requires-python = ">=3.7"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
dependencies = [
"openpyxl",
"pyyaml",
]
[project.scripts]
wif2ansible = "wif2ansible.main:main"
[tool.setuptools.packages.find]
where = ["."]

2
requirements.txt Normal file
View File

@@ -0,0 +1,2 @@
openpyxl
pyyaml

4
run.py Normal file
View File

@@ -0,0 +1,4 @@
from wif2ansible.main import main
if __name__ == '__main__':
main()

0
wif2ansible/__init__.py Normal file
View File

242
wif2ansible/excel_reader.py Normal file
View File

@@ -0,0 +1,242 @@
import openpyxl
from openpyxl.worksheet.worksheet import Worksheet
from typing import List, Dict, Tuple, Optional
from .models import Server, Flow
from .parsers import parse_ports, parse_ip, clean_header
from openpyxl.utils import get_column_letter
def is_row_hidden(sheet: Worksheet, row_idx: int) -> bool:
dim = sheet.row_dimensions.get(row_idx)
return dim is not None and dim.hidden
def is_col_hidden(sheet: Worksheet, col_idx: int) -> bool:
letter = get_column_letter(col_idx)
dim = sheet.column_dimensions.get(letter)
return dim is not None and dim.hidden
def find_header_row(sheet: Worksheet, keywords: List[str]) -> Tuple[Optional[int], Dict[str, int]]:
"""
Scans the first 20 rows to find the best matching header row.
Returns (row_index, column_mapping).
"""
best_row = None
best_map = {}
max_matches = 0
for r in range(1, 21):
if is_row_hidden(sheet, r):
continue
row_values = []
for c in range(1, sheet.max_column + 1):
if is_col_hidden(sheet, c):
row_values.append("") # Treat hidden column as empty
continue
val = sheet.cell(row=r, column=c).value
row_values.append(clean_header(val))
# Check matches
current_map = {}
for kw in keywords:
for idx, cell_val in enumerate(row_values):
# match if keyword is in cell value
if kw in cell_val:
# heuristic preference: prefer cells that are not too long?
# e.g. "Source IP" vs "This is a note about Source IP"
current_map[kw] = idx + 1
break
match_count = len(current_map)
if match_count > max_matches:
max_matches = match_count
best_row = r
best_map = current_map
# Threshold: Matches should be significant
if max_matches >= 2: # Found at least 2 keywords
return best_row, best_map
return None, {}
def read_servers(filename: str) -> Dict[str, Server]:
"""
Reads servers from the 'Servers' or similar tab.
Returns a dict keyed by IP or Hostname (preference to management IP).
"""
wb = openpyxl.load_workbook(filename, data_only=True)
# improved sheet finder
target_sheet = None
for sname in wb.sheetnames:
if 'server' in sname.lower():
target_sheet = wb[sname]
break
if not target_sheet:
print("Warning: No 'Servers' sheet found.")
return {}
# keywords: reference, platform, ip address, management ip?
# Ruby script looked for: reference, type, alias, platform, middleware
header_keywords = ['reference', 'platform', 'ip address']
header_row_idx, col_map = find_header_row(target_sheet, header_keywords)
if not header_row_idx:
print("Error: Could not find Server table headers.")
return {}
servers = {} # Key: Reference (as primary key)
# Iterate rows
for r in range(header_row_idx + 1, target_sheet.max_row + 1):
if is_row_hidden(target_sheet, r):
print(f"Skipping hidden server row {r}")
continue
# Extract data
ref_idx = col_map.get('reference')
plat_idx = col_map.get('platform')
ip_idx = col_map.get('ip address') # Generic IP
# Helper to get value
def get_val(idx):
if not idx: return None
v = target_sheet.cell(row=r, column=idx).value
return str(v).strip() if v else None
ref = get_val(ref_idx)
if not ref or ref.lower() == 'example':
continue
plat = get_val(plat_idx) or 'unknown'
ip_raw = get_val(ip_idx)
ip_addr = None
if ip_raw:
ips = parse_ip(ip_raw)
if ips:
ip_addr = ips[0] # Take first valid IP
s = Server(
reference=ref,
hostname=ref, # Default hostname to reference
platform=plat,
ip_address=ip_addr
)
servers[ref] = s
# verify duplicate logic: The ruby script cached 'Server Reference' -> IP.
# We will key by reference.
return servers
def read_flows(filename: str, server_inventory: Dict[str, Server] = None) -> List[Flow]:
"""
Reads flows from flow tabs.
server_inventory: Optional, for validation if needed.
"""
wb = openpyxl.load_workbook(filename, data_only=True)
flows = []
# Find all sheets with 'flow' in name
flow_sheets = [s for s in wb.sheetnames if 'flow' in s.lower()]
for sname in flow_sheets:
sheet = wb[sname]
print(f"Processing sheet: {sname}")
# Keywords based on Ruby script: 'Source Public IP', 'Source Private IP', 'Destination Public IP', 'Port'
# Simplified: source, destination, port
# Simplified: source, destination, port, ip
keywords = ['source', 'destination', 'port', 'ip']
header_row_idx, col_map = find_header_row(sheet, keywords)
if not header_row_idx:
print(f"Warning: Could not find headers in {sname}")
continue
# Refine map - we need specific source/dest columns (IPs)
# Re-scan header row to get specific columns
# Note: find_header_row returned the *first* match for 'source', which might be 'Source Ref'.
# We need strictly 'Source * IP' or similar.
# Let's do a more specific map manually based on the header row found
header_row_values = []
for c in range(1, sheet.max_column + 1):
if is_col_hidden(sheet, c):
header_row_values.append("")
continue
header_row_values.append(clean_header(sheet.cell(row=header_row_idx, column=c).value))
# Find indices
src_ip_indices = [i+1 for i, v in enumerate(header_row_values) if 'source' in v and 'ip' in v]
dst_ip_indices = [i+1 for i, v in enumerate(header_row_values) if 'destination' in v and 'ip' in v]
port_indices = [i+1 for i, v in enumerate(header_row_values) if 'port' in v]
flow_id_indices = [i+1 for i, v in enumerate(header_row_values) if 'flow' in v and '#' in v] # "Flow #"
if not src_ip_indices or not dst_ip_indices or not port_indices:
print(f"Skipping {sname}: Missing essential IP/Port columns.")
continue
# Iterate rows
for r in range(header_row_idx + 1, sheet.max_row + 1):
if is_row_hidden(sheet, r):
continue
# Helper
def get_val(idx):
v = sheet.cell(row=r, column=idx).value
return str(v).strip() if v else None
# Flow #
fid = "unknown"
if flow_id_indices:
fid = get_val(flow_id_indices[0]) or "unknown"
# Get valid Source IPs from the row
# There might be "Source Public IP" AND "Source Private IP".
# Logic: Collect ALL valid IPs from source columns.
src_ips = []
for idx in src_ip_indices:
val = get_val(idx)
if val:
found = parse_ip(val)
src_ips.extend(found)
# Destination IPs
dst_ips = []
for idx in dst_ip_indices:
val = get_val(idx)
if val:
found = parse_ip(val)
dst_ips.extend(found)
# Ports
ports = []
for idx in port_indices:
val = get_val(idx)
if val:
p = parse_ports(val)
ports.extend(p)
# Cartesian Product: Source x Dest
# If any are missing, skip
if not src_ips or not dst_ips or not ports:
# Debug Info?
# print(f"Row {r}: Missing data. Src: {src_ips}, Dst: {dst_ips}, Ports: {ports}")
continue
for s_ip in src_ips:
for d_ip in dst_ips:
f = Flow(
flow_id=fid,
source_ip=s_ip,
destination_ip=d_ip,
ports=sorted(list(set(ports))) # dedup ports
)
flows.append(f)
return flows

73
wif2ansible/inventory.py Normal file
View File

@@ -0,0 +1,73 @@
from typing import List, Dict, Any
from .models import Server, Flow
def generate_inventory(servers: Dict[str, Server], flows: List[Flow]) -> Dict[str, Any]:
"""
Generates the Ansible inventory dictionary.
servers: Dict[Reference, Server]
flows: List[Flow]
"""
# Build Lookup Map: IP -> Server
# Note: A server might have multiple IPs (e.g. Mgt, Public, Private).
# The 'Server' object mainly captures the Management IP or the one listed in the "IP Address" column.
# If the WIF has "Source Public IP" and that differs from "IP Address" in Servers tab,
# we might miss it if we only index the primary IP.
# However, strict filtering means we trust the 'Servers' tab.
ip_to_server = {}
for s in servers.values():
if s.ip_address:
ip_to_server[s.ip_address] = s
# also index by hostname/reference potentially?
# ip_to_server[s.reference] = s
# But flows ususally have IPs.
inventory_hosts = {}
# Process flows
match_count = 0
drop_count = 0
for flow in flows:
# Find source server
server = ip_to_server.get(flow.source_ip)
if not server:
# Try finding by looking if source matches any server's reference/hostname?
# Unlikely for IPs.
drop_count += 1
if drop_count <= 5: # Debug spam limit
print(f"Dropping flow {flow.flow_id}: Source {flow.source_ip} not found in Servers tab.")
continue
match_count += 1
# Prepare host entry if new
# We use the IP as the key in inventory 'hosts'
host_key = server.ip_address
if host_key not in inventory_hosts:
host_vars = server.get_ansible_vars()
host_vars['flows'] = []
inventory_hosts[host_key] = host_vars
# Add flow
flow_entry = {
'flow_id': flow.flow_id,
'dest': flow.destination_ip,
'ports': flow.ports,
'protocol': flow.protocol
}
# Dedup check?
# Ideally we shouldn't have exact duplicates, but appending is safe.
inventory_hosts[host_key]['flows'].append(flow_entry)
print(f"Inventory Generation Report: Matches={match_count}, Dropped={drop_count}")
return {
'all': {
'hosts': inventory_hosts
}
}

39
wif2ansible/main.py Normal file
View File

@@ -0,0 +1,39 @@
import sys
import yaml
import argparse
from datetime import datetime
from .excel_reader import read_servers, read_flows
from .inventory import generate_inventory
def main():
parser = argparse.ArgumentParser(description="Convert WIF Excel to Ansible Inventory")
parser.add_argument("wif_file", help="Path to the WIF Excel file (.xlsx)")
parser.add_argument("--output", "-o", help="Output YAML file path", default=None)
args = parser.parse_args()
print(f"Reading servers from {args.wif_file}...")
servers = read_servers(args.wif_file)
print(f"Found {len(servers)} servers in allowlist.")
print(f"Reading flows...")
flows = read_flows(args.wif_file, servers)
print(f"Found {len(flows)} raw flows.")
print("Generating inventory...")
inventory = generate_inventory(servers, flows)
# Determine output filename
if args.output:
outfile = args.output
else:
timestamp = datetime.now().strftime("%Y-%m-%d_%H%M")
outfile = f"inventory_{timestamp}.yml"
with open(outfile, 'w') as f:
yaml.dump(inventory, f, default_flow_style=False)
print(f"Successfully wrote inventory to {outfile}")
if __name__ == "__main__":
main()

36
wif2ansible/models.py Normal file
View File

@@ -0,0 +1,36 @@
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any
@dataclass
class Server:
reference: str
hostname: str # This might be same as reference
ip_address: Optional[str] = None
platform: str = 'unknown' # e.g. 'Windows', 'Linux'
def get_ansible_vars(self) -> Dict[str, Any]:
"""Returns ansible variables based on platform."""
vars = {}
# Basic mapping - can be expanded
p = self.platform.lower()
if 'win' in p:
vars['ansible_connection'] = 'winrm'
vars['ansible_winrm_transport'] = 'ntlm'
vars['ansible_winrm_port'] = 5985
vars['ansible_winrm_server_cert_validation'] = 'ignore' # Common default, maybe safer to omit
elif 'lin' in p or 'rhel' in p or 'ubuntu' in p:
# Default ssh is usually fine, but being explicit doesn't hurt
pass
return vars
@dataclass
class Flow:
flow_id: str
source_ip: str
destination_ip: str
ports: List[int]
protocol: str = 'tcp'
def __hash__(self):
return hash((self.flow_id, self.source_ip, self.destination_ip, tuple(sorted(self.ports)), self.protocol))

74
wif2ansible/parsers.py Normal file
View File

@@ -0,0 +1,74 @@
import re
from typing import List
def clean_header(header: str) -> str:
if not header:
return ""
# Remove HTML tags if any (from Ruby script logic)
header = re.sub(r'<[^>]+>', '', str(header))
return header.strip().lower()
def parse_ports(port_str: str) -> List[int]:
"""
Parses a string containing ports, ranges, or 'any'.
Returns a list of integer ports.
"""
if not port_str:
return []
s = str(port_str).lower()
# Remove 'udp' if present to focus on port numbers,
# but arguably we might want to capture protocol.
# The Ruby script removed it. We'll strip it for port extraction.
s = re.sub(r'udp', '', s)
ports = set()
# Handle 'any' or 'all' - defaulting to common ports as per Ruby script
if 'any' in s or 'all' in s:
return [22, 3389, 80, 443, 3306, 5432, 8443, 60000]
# Split by common delimiters
parts = re.split(r'[,\n\s]+', s)
for part in parts:
part = part.strip()
if not part:
continue
# Range handling: 8000-8010
# The ruby script had issues with ranges, let's do it right.
range_match = re.match(r'^(\d+)[-](\d+)$', part)
if range_match:
start, end = map(int, range_match.groups())
if start <= end:
# Limitation: adding huge ranges might blow up inventory size
# but for Ansible 'ports' list it's better to be explicit or use range syntax.
# For now, let's keep it expanded if small, or maybe just keeps the start/end?
# Ruby script logic: expanded it.
# We'll limit expansion to avoid DOSing ourselves.
if end - start < 1000:
ports.update(range(start, end + 1))
else:
# Fallback: just add start and end to avoid massive lists?
# Or maybe ansible allows ranges?
# Usually we list ports. Let's expand for now.
ports.update(range(start, end + 1))
continue
# Single port
if part.isdigit():
ports.add(int(part))
return sorted(list(ports))
def parse_ip(ip_str: str) -> List[str]:
"""Finds all IPv4 addresses in a string."""
if not ip_str:
return []
s = str(ip_str)
# Simple regex for IPv4
ips = re.findall(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', s)
return list(set(ips))

417
wif2ansibleinventory.rb Normal file
View File

@@ -0,0 +1,417 @@
#!/usr/bin/ruby
#USAGE NOTES
#bundle exec ruby .\wif2ansible.rb TAS000000535469.xlsx
require 'roo'
require 'yaml'
require 'resolv'
require 'uri'
require 'socket'
OSCACHE = {}
UNREACHABLE_HOSTS = []
class Hash
def fuzzy_find_first(find)
select { |key, value| key.to_s.match(/#{find}/i) }
end
def find_first(find)
select { |key, value| key.to_s.match(/#{find}/i) }.values.flatten.first
end
def find_custom(find)
select { |key, value| key.to_s.match(/#{find}/i) }.values.flatten
end
end
wif_file = ARGV[0]
command_sheet_name = ARGV[1] ? ARGV[1] : "flow app"
portwass_switches = ARGV[2] ? ARGV[2] : "" #optionally add switches to portwas like -n
begin
XLSX = Roo::Spreadsheet.open(wif_file, only_visible_sheets: true)
rescue
excel_fix_guide = %{
1. Delete Sheet: Diagram
2. Delete Sheet: Notes and Exceptions
3. Save as xls
4. Close Excel completely
5. Open xls file
6. Run document inspect, delete hidden rows
7. Save the xls as xlsx}
puts excel_fix_guide
end
#class Common
def find_sheet_name(name)
result = []
XLSX.sheets.each do |sheet_name|
if sheet_name.scan(/#{name.gsub(' ', '.*')}/i).any?
result << sheet_name
end
end
return result
end
def remove_html(string)
if string.class == String
string.split(/\<.*?\>/)
.map(&:strip)
.reject(&:empty?)
.join(' ')
.gsub(/\s,/,',').gsub('*', '').strip
else
string
end
end
def is_windows?(servername)
if OSCACHE[servername] == 'win'
return true
elsif OSCACHE[servername] == 'lin'
return false
#elsif OSCACHE[servername] == false
# puts "Timeout for #{servername}, skipping..."
# return false
end
attempts = 0
begin
attempts+=1
s = TCPSocket.new servername, 3389
OSCACHE[servername] = 'win' if s
return true if s
rescue Errno::ECONNREFUSED
puts "#{servername}: Port 3389 not open, #{servername} is not a windows server"
s = nil
begin; s = TCPSocket.new servername, 22; rescue;OSCACHE[servername] = false; UNREACHABLE_HOSTS << servername;end;
OSCACHE[servername] = 'lin' if s
return false
rescue IO::TimeoutError
retry unless attempts > 2
puts "#{servername}: IO Timeout to #{servername}. You may not be connected to the correct EDC. Please connect your VPN or run from a JUMP server in the correct EDC"
OSCACHE[servername] = false
return false
end
end
def is_linux?(servername)
if OSCACHE[servername] == 'lin'
return true
elsif OSCACHE[servername] == 'win'
return false
#elsif OSCACHE[servername] == false
# puts "Timeout for #{servername}, skipping..."
# return false
end
attempts = 0
begin
attempts+=1
s = TCPSocket.new servername, 22
OSCACHE[servername] = 'lin'
return true if s
rescue Errno::ECONNREFUSED
puts "#{servername}: Port 22 not open, #{servername} is not a linux server"
s = nil
begin; s = TCPSocket.new servername, 3389; rescue;OSCACHE[servername] = false; UNREACHABLE_HOSTS << servername;end;
OSCACHE[servername] = 'win' if s
return false
rescue IO::TimeoutError
retry unless attempts > 2
puts "#{servername}: IO Timeout to #{servername}. You may not be connected to the correct EDC. Please connect your VPN or run from a JUMP server in the correct EDC"
OSCACHE[servername] = false
return false
end
end
def select_value_from_row(row, column)
row.each{|k,v| return [k,cell_value_to_array(v)] if not v.nil? and not k.nil? and k.gsub("\n", '').scan(/#{column.gsub(' ', '.*')}/i).any? }
end
def cell_value_to_array(value)
value.to_s.split(/[\n, " ", ","]/).compact.keep_if{|a| a.gsub(' ', '') != "" }
end
#end #Common
#class Flow
def is_empty_or_example_flow_row?(row)
nil_count = 0
nil_count_limit = 5
row.each do |k,v|
nil_count = nil_count + 1 if v.nil?
end
if nil_count >= nil_count_limit or row.first[1].class == String or row.first[1].nil?
return true
else
return false
end
end
def get_all_rows_and_find_headers(name)
flow_header_items = [/flow/i, /source/i, /destination/i, /public/i, /ip/i, /private/i, /port/i]
begin
sheet_name = find_sheet_name(name).last
puts "Using sheet: #{sheet_name}"
XLSX.sheet(sheet_name).parse(header_search: flow_header_items)
rescue Roo::HeaderRowNotFoundError
sheet_name = find_sheet_name(name).first
puts "ERROR: POSSIBLE EXTRA SHEET, trying to fix...\nUsing sheet: #{sheet_name}"
begin
XLSX.sheet(sheet_name).parse(header_search: flow_header_items)
rescue Roo::HeaderRowNotFoundError
puts "ERROR: Flow sheet table header names are incorrect in provided WIF. This script is looking for the following words: #{flow_header_items}. Fix this in Excel and use your modified WIF file."
puts "Specifically, I want to see 'Source Public IP' 'Source Private IP' 'Destination Public IP' 'Destination Private IP'. Add columns if they have been deleted."
exit
end
end
end
def remove_udp_ports(value)
value = value.to_s if value.class == Array
value.to_s.gsub(/\d{2,5}.{1}udp/i, '')
end
def parse_ports(value)
value = remove_udp_ports(value)
value = value.to_s if value.class == Array
#port_ranges = value.scan(/\d{2,5}-\d{2,5}|\d{2,5} - \d{2,5}/)
port_numbers = [value.scan(/\d{2,5}/)].flatten.map{|port| port.to_i}
#if port_ranges.any?
# port_numbers = [(port_numbers + port_ranges)].flatten.compact.map{|range| range.to_s.split('-') }
# port_numbers = [port_numbers].flatten!.uniq!.map{|port| port.to_i}
#end
if value.scan(/any|all/i).any? && !port_numbers.any?
return [22,3389,80,443,3306,5432,8443,60000] #return some frequently used ports if they requested all/any
else
return port_numbers
end
end
def is_empty_or_example_flow_row?(row)
all_source_ips = []
all_source_ips << select_value_from_row(row, 'source private ip')[1]
all_source_ips << select_value_from_row(row, 'source public ip')[1]
all_source_ips = all_source_ips.flatten.compact
all_destination_ips = []
all_destination_ips << select_value_from_row(row, 'destination private ip')[1]
all_destination_ips << select_value_from_row(row, 'destination public ip')[1]
all_destination_ips = all_destination_ips.flatten.compact
if !all_source_ips.any? or !all_destination_ips.any? or row.first[1].class == String or row.first[1].nil?
return true
else
return false
end
end
#only flows that contain source ip and destination ip
def testable_flow_rows(sheet_name)
begin
get_all_rows_and_find_headers(sheet_name).keep_if{|a| !is_empty_or_example_flow_row?(a) }
rescue TypeError
puts "ERROR: Problem accessing sheet with '#{sheet_name}' in the name. Does this sheet exist?"
exit
end
end
def cleanup_flow_formatting(rows)
rows.map do |row|
result = {}
row.each do |k,v|
cleaned_key = remove_html(k).to_s.gsub(" ", "_").gsub("\n", "_")
if cleaned_key.scan(/port/i).any?
cleaned_value = parse_ports(remove_html(v))
elsif v.class == Integer
cleaned_value = v
elsif v.to_s.scan(/\b[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\b/).any? #ip addresses
cleaned_value = cell_value_to_array(remove_html(v)).to_s.scan(/\b[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\b/)
else
cleaned_value = cell_value_to_array(remove_html(v))
end
result[cleaned_key] = cleaned_value
end
result
end
end
#end #Flow
#class Server
def is_empty_or_example_server_row?(row)
if select_value_from_row(row, 'cpu')[1].nil? || select_value_from_row(row, 'ram')[1].nil? || select_value_from_row(row, 'server reference')[1] == "Example"
return true
else
return false
end
end
def find_header_in_server_sheet
sheet_name = find_sheet_name('server').first
puts "Using sheet: #{sheet_name}"
XLSX.sheet(sheet_name).parse(header_search: [/reference/i, /type/i, /alias/i, /platform/i, /middleware/i ]).first.map{|k,v| k}
end
def all_servers
sheet_name = find_sheet_name('server').first
XLSX.sheet(sheet_name).parse(header_search: [/reference/i, /type/i, /alias/i, /platform/i, /middleware/i ]).keep_if{|a| !is_empty_or_example_server_row?(a) }
end
def reference_to_ip(server_reference)
matching = []
all_servers.map do |row|
this_reference = select_value_from_row(row, 'reference')
if this_reference[1][0].scan(/#{server_reference}/i).any?
matching << row
end
end
return [] if !matching.any?
return matching.first.keep_if{|k,v| k.scan(/ip.*address/i).any? if !k.nil? and !v.nil? }.map{|k,v| v}.sort!
end
def to_mgt_ip(name)
begin
fqdn = Resolv.getname(name)
mgt_dns = fqdn.split('.').first + '.ds.gc.ca'
mgt_ip = Resolv.getaddress(mgt_dns)
rescue Resolv::ResolvError
begin
puts "#{name} not found in ds.gc.ca, checking pre-ds.gc.ca..."
fqdn = Resolv.getname(name)
mgt_dns = fqdn.split('.').first + '.pre-ds.gc.ca'
mgt_ip = Resolv.getaddress(mgt_dns)
rescue Resolv::ResolvError
puts "#{name} is not a server OR no DNS entries exist in ds.gc.ca or pre-ds.gc.ca, skipping source..."
end
end
#return mgt_ip
return mgt_dns if mgt_ip.to_s.length > 4
end
def flows_by_host_to_ansible_inventory_yaml(flows_by_host)
return {"all" => {"hosts" => flows_by_host}}.to_yaml
end
#class Result
def parse_portwass(stdout)
stdout.scan(/^(\d{2,5}):\ (\w{4})/)
end
#end
ansible_inventory_hash ={}
ansible_tasks = []
flows = []
if ARGV[1]
begin
flows = cleanup_flow_formatting(testable_flow_rows(ARGV[1]))
rescue
puts "INFO: unable to parse sheet containing '#{ARGV[1]}'"
exit
end
end
begin
flows << cleanup_flow_formatting(testable_flow_rows('flow app'))
rescue
puts "INFO: unable to parse sheet containing 'flow app'"
end
begin
flows << cleanup_flow_formatting(testable_flow_rows('flow man'))
rescue
puts "INFO: unable to parse sheet containing 'flow man'"
end
flows.flatten!
results = {}
mgt_ip_list = []
failed_portwass_cmds = []
flows_count = 0
flows.each do |flow|
puts "\n\n#{'#'*8} Parsing flow number: #{flow['Flow_#']} #{'#'*8}"
if results[flow['Flow_#']].nil?
results[flow['Flow_#']] = {}
results[flow['Flow_#']]['connections'] = []
end
if flow["Source_Public_IP"].nil? || flow["Source_Private_IP"].nil? || flow["Destination_Private_IP"].nil? || flow["Source_Public_IP"].nil?
puts "ERROR IN SPREADSHEET:\n\n Please ensure there are columns named (case sensitive) \"Source Private IP\", \"Source Public IP\", \"Destination Private IP\", \"Destination Public IP\"\n\n Please update the names of columns and possibly add empty columns with these names if they have been combined."
exit
elsif flow["Flow_#"].nil?
puts "ERROR IN SPREADSHEET:\n\n Please ensure the Flow # column is named (case sensitive) \"Flow #\""
exit
end
flow_src_ips = flow["Source_Public_IP"].any? ? flow["Source_Public_IP"] : flow["Source_Private_IP"]
flow_src_ips.each do |src_ip|
mgt_ip = to_mgt_ip(src_ip)
mgt_ip_list << mgt_ip
flow_dst_ips = flow["Destination_Public_IP"].any? ? flow["Destination_Public_IP"] : flow["Destination_Private_IP"]
puts "Destination IPs empty for flow #{flow['Flow_#']}, skipping" && next if flow_dst_ips.nil? || !flow_dst_ips.any?
flow_dst_ips.each do |dst_ip|
if mgt_ip.to_s.length < 3
puts "skipping #{mgt_ip} #{src_ip} as I don't think it's a windows/linux server"
next
end
if ansible_inventory_hash[mgt_ip].nil?
if is_linux? mgt_ip
ansible_inventory_hash.merge!({mgt_ip => {"flows" => []} })
elsif is_windows? mgt_ip
ansible_inventory_hash.merge!({mgt_ip => {"ansible_connection" => "winrm", "ansible_winrm_transport" => "ntlm", "ansible_winrm_port" => 5985, "flows" => []} })
else
#add to list fo unreachable hosts to output at end
UNREACHABLE_HOSTS << mgt_ip
UNREACHABLE_HOSTS << src_ip
end
#puts ansible_inventory_hash.to_yaml
end
begin
a ={ "flow_number" => flow['Flow_#'], "dest" => dst_ip, "ports" => [flow.find_custom("Port")].flatten.uniq }
ansible_inventory_hash[mgt_ip]["flows"] << a
puts "#{mgt_ip} : #{a}"
flows_count +=1
rescue NoMethodError
puts "SKIPPING ERROR: #{mgt_ip} flow #{flow['Flow_#']}"
end
end#dst
end#src
#end
end#flows
puts ansible_inventory_hash
puts flows_by_host_to_ansible_inventory_yaml(ansible_inventory_hash)
ansible_inventory_filename = "#{File.basename(wif_file, "xlsx")[0..22]}_inventory_#{Time.now.strftime("%d-%m-%Y_%H.%M")}.yml"
File.open ansible_inventory_filename, 'w' do |file|
file.write flows_by_host_to_ansible_inventory_yaml(ansible_inventory_hash)
end
puts "="*20
puts "Source servers found: #{ansible_inventory_hash.count}"
puts "Total connections: #{flows_count}"
puts "Generated inventory: #{ansible_inventory_filename}"
puts "="*20
if UNREACHABLE_HOSTS.compact.uniq.any?
puts "The following [#{UNREACHABLE_HOSTS.compact.uniq.count}] servers could not be reached. Either they are in another datacentre or arent windows/linux servers:"
puts UNREACHABLE_HOSTS.compact.uniq.join("\n")
end