Initial commit of wif2ansible

2026-02-06 15:12:49 -05:00
commit aa299df41e
13 changed files with 1025 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,20 @@
+# Excel Files (Data)
+*.xls
+*.xlsx
+
+# Generated Inventories (Contain Sensitive IPs)
+inventory_*.yml
+*.yml
+
+# Python Build Artifacts
+dist/
+build/
+*.spec
+__pycache__/
+*.pyc
+*.egg-info/
+.pytest_cache/
+
+# IDE settings
+.vscode/
+.idea/
--- a/README.md
+++ b/README.md
@@ -0,0 +1,58 @@
+# WIF to Ansible Inventory Converter
+
+This tool converts **Workload Intake Form (WIF)** Excel documents into **Ansible Inventory** YAML files.
+
+## Features
+- **Robust Excel Parsing**: Automatically handles shifted cells and ignores hidden headers/rows/columns.
+- **Server Filtering**: Only generates flows for servers explicitly defined in the 'Servers' tab.
+- **Strict Validation**: Dropped flows are reported in the console output.
+
+## How to Run
+
+### Option 1: Standalone Executable (Windows)
+No Python installation required.
+
+1.  Navigate to the `dist` folder.
+2.  Run the executable from the command line, providing your WIF Excel file as an argument:
+
+```powershell
+.\dist\wif2ansible.exe "Path\To\Your_WIF.xlsx"
+```
+
+The inventory file (e.g., `inventory_YYYY-MM-DD_HHMM.yml`) will be generated in the current directory.
+
+**Optional: Specify Output File**
+```powershell
+.\dist\wif2ansible.exe "Path\To\Your_WIF.xlsx" -o "my_inventory.yml"
+```
+
+### Option 2: Run via Python Source
+If you are developing or prefer running the raw script:
+
+1.  **Install Requirements**:
+    ```bash
+    pip install -r requirements.txt
+    ```
+
+2.  **Run Module**:
+    ```bash
+    python -m wif2ansible.main "Path\To\Your_WIF.xlsx"
+    ```
+
+## Requirements for WIF Excel File
+- Must contain a **Servers** tab (e.g., "B.Server Info") with columns for `Reference`, `Platform`, and `IP Address`.
+- Must contain **Flow** tabs (e.g., "F.Dataflow - Application") with headers for `Source`, `Destination`, and `Port`.
+- **Hidden rows and columns are strictly ignored**. Ensure valid data is visible.
+
+## Output
+Generates a YAML file compatible with Ansible:
+```yaml
+all:
+  hosts:
+    192.168.1.10:
+      ansible_connection: winrm
+      flows:
+        - dest: 10.0.0.5
+          ports: [80, 443]
+          protocol: tcp
+```
--- a/debug_headers.py
+++ b/debug_headers.py
@@ -0,0 +1,32 @@
+import openpyxl
+import sys
+# Force stdout to utf-8 if possible, or just replace bad chars on print
+sys.stdout.reconfigure(encoding='utf-8')
+
+from wif2ansible.excel_reader import clean_header
+
+def debug(filename):
+    wb = openpyxl.load_workbook(filename, data_only=True)
+    for sname in wb.sheetnames:
+        if 'application' not in sname.lower():
+            continue
+        print(f"--- Sheet: {sname} ---")
+        sheet = wb[sname]
+        for r in range(1, 30): 
+            if sheet.row_dimensions[r].hidden:
+                print(f"Row {r}: [HIDDEN]")
+                continue
+                
+            vals = []
+            for c in range(1, 20): # Scan first 20 cols
+                v = sheet.cell(row=r, column=c).value
+                if v:
+                    try:
+                        vals.append(clean_header(v))
+                    except:
+                        vals.append("ERROR")
+            if vals:
+                print(f"Row {r}: {vals}")
+
+if __name__ == "__main__":
+    debug(sys.argv[1])
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,28 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "wif2ansible"
+version = "0.1.0"
+description = "Convert WIF Excel documents to Ansible Inventory"
+authors = [
+  { name="System Admin", email="admin@example.com" },
+]
+readme = "README.md"
+requires-python = ">=3.7"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "openpyxl",
+    "pyyaml",
+]
+
+[project.scripts]
+wif2ansible = "wif2ansible.main:main"
+
+[tool.setuptools.packages.find]
+where = ["."]
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+openpyxl
+pyyaml
--- a/run.py
+++ b/run.py
@@ -0,0 +1,4 @@
+from wif2ansible.main import main
+
+if __name__ == '__main__':
+    main()
--- a/wif2ansible/init.py
+++ b/wif2ansible/init.py
--- a/wif2ansible/excel_reader.py
+++ b/wif2ansible/excel_reader.py
@@ -0,0 +1,242 @@
+import openpyxl
+from openpyxl.worksheet.worksheet import Worksheet
+from typing import List, Dict, Tuple, Optional
+from .models import Server, Flow
+from .parsers import parse_ports, parse_ip, clean_header
+
+from openpyxl.utils import get_column_letter
+
+def is_row_hidden(sheet: Worksheet, row_idx: int) -> bool:
+    dim = sheet.row_dimensions.get(row_idx)
+    return dim is not None and dim.hidden
+
+def is_col_hidden(sheet: Worksheet, col_idx: int) -> bool:
+    letter = get_column_letter(col_idx)
+    dim = sheet.column_dimensions.get(letter)
+    return dim is not None and dim.hidden
+
+def find_header_row(sheet: Worksheet, keywords: List[str]) -> Tuple[Optional[int], Dict[str, int]]:
+    """
+    Scans the first 20 rows to find the best matching header row.
+    Returns (row_index, column_mapping).
+    """
+    best_row = None
+    best_map = {}
+    max_matches = 0
+    
+    for r in range(1, 21):
+        if is_row_hidden(sheet, r):
+            continue
+            
+        row_values = []
+        for c in range(1, sheet.max_column + 1):
+            if is_col_hidden(sheet, c):
+                row_values.append("") # Treat hidden column as empty
+                continue
+            val = sheet.cell(row=r, column=c).value
+            row_values.append(clean_header(val))
+            
+        # Check matches
+        current_map = {}
+        for kw in keywords:
+            for idx, cell_val in enumerate(row_values):
+                # match if keyword is in cell value
+                if kw in cell_val:
+                    # heuristic preference: prefer cells that are not too long?
+                    # e.g. "Source IP" vs "This is a note about Source IP"
+                    current_map[kw] = idx + 1
+                    break
+        
+        match_count = len(current_map)
+        if match_count > max_matches:
+            max_matches = match_count
+            best_row = r
+            best_map = current_map
+            
+    # Threshold: Matches should be significant
+    if max_matches >= 2: # Found at least 2 keywords
+        return best_row, best_map
+            
+    return None, {}
+
+def read_servers(filename: str) -> Dict[str, Server]:
+    """
+    Reads servers from the 'Servers' or similar tab.
+    Returns a dict keyed by IP or Hostname (preference to management IP).
+    """
+    wb = openpyxl.load_workbook(filename, data_only=True)
+    
+    # improved sheet finder
+    target_sheet = None
+    for sname in wb.sheetnames:
+        if 'server' in sname.lower():
+            target_sheet = wb[sname]
+            break
+    
+    if not target_sheet:
+        print("Warning: No 'Servers' sheet found.")
+        return {}
+
+    # keywords: reference, platform, ip address, management ip?
+    # Ruby script looked for: reference, type, alias, platform, middleware
+    header_keywords = ['reference', 'platform', 'ip address'] 
+    
+    header_row_idx, col_map = find_header_row(target_sheet, header_keywords)
+    
+    if not header_row_idx:
+        print("Error: Could not find Server table headers.")
+        return {}
+        
+    servers = {} # Key: Reference (as primary key)
+    
+    # Iterate rows
+    for r in range(header_row_idx + 1, target_sheet.max_row + 1):
+        if is_row_hidden(target_sheet, r):
+            print(f"Skipping hidden server row {r}")
+            continue
+            
+        # Extract data
+        ref_idx = col_map.get('reference')
+        plat_idx = col_map.get('platform')
+        ip_idx = col_map.get('ip address') # Generic IP
+        
+        # Helper to get value
+        def get_val(idx):
+            if not idx: return None
+            v = target_sheet.cell(row=r, column=idx).value
+            return str(v).strip() if v else None
+
+        ref = get_val(ref_idx)
+        if not ref or ref.lower() == 'example':
+            continue
+            
+        plat = get_val(plat_idx) or 'unknown'
+        ip_raw = get_val(ip_idx)
+        
+        ip_addr = None
+        if ip_raw:
+            ips = parse_ip(ip_raw)
+            if ips:
+                ip_addr = ips[0] # Take first valid IP
+        
+        s = Server(
+            reference=ref, 
+            hostname=ref, # Default hostname to reference 
+            platform=plat,
+            ip_address=ip_addr
+        )
+        servers[ref] = s
+        
+        # verify duplicate logic: The ruby script cached 'Server Reference' -> IP.
+        # We will key by reference.
+        
+    return servers
+
+def read_flows(filename: str, server_inventory: Dict[str, Server] = None) -> List[Flow]:
+    """
+    Reads flows from flow tabs.
+    server_inventory: Optional, for validation if needed.
+    """
+    wb = openpyxl.load_workbook(filename, data_only=True)
+    
+    flows = []
+    
+    # Find all sheets with 'flow' in name
+    flow_sheets = [s for s in wb.sheetnames if 'flow' in s.lower()]
+    
+    for sname in flow_sheets:
+        sheet = wb[sname]
+        print(f"Processing sheet: {sname}")
+        
+        # Keywords based on Ruby script: 'Source Public IP', 'Source Private IP', 'Destination Public IP', 'Port'
+        # Simplified: source, destination, port
+        # Simplified: source, destination, port, ip
+        keywords = ['source', 'destination', 'port', 'ip']
+        header_row_idx, col_map = find_header_row(sheet, keywords)
+        
+        if not header_row_idx:
+            print(f"Warning: Could not find headers in {sname}")
+            continue
+            
+        # Refine map - we need specific source/dest columns (IPs)
+        # Re-scan header row to get specific columns
+        # Note: find_header_row returned the *first* match for 'source', which might be 'Source Ref'.
+        # We need strictly 'Source * IP' or similar.
+        
+        # Let's do a more specific map manually based on the header row found
+        header_row_values = []
+        for c in range(1, sheet.max_column + 1):
+            if is_col_hidden(sheet, c):
+                header_row_values.append("")
+                continue
+            header_row_values.append(clean_header(sheet.cell(row=header_row_idx, column=c).value))
+            
+        # Find indices
+        src_ip_indices = [i+1 for i, v in enumerate(header_row_values) if 'source' in v and 'ip' in v]
+        dst_ip_indices = [i+1 for i, v in enumerate(header_row_values) if 'destination' in v and 'ip' in v]
+        port_indices = [i+1 for i, v in enumerate(header_row_values) if 'port' in v]
+        flow_id_indices = [i+1 for i, v in enumerate(header_row_values) if 'flow' in v and '#' in v] # "Flow #"
+
+        if not src_ip_indices or not dst_ip_indices or not port_indices:
+             print(f"Skipping {sname}: Missing essential IP/Port columns.")
+             continue
+             
+        # Iterate rows
+        for r in range(header_row_idx + 1, sheet.max_row + 1):
+            if is_row_hidden(sheet, r):
+                continue
+                
+            # Helper
+            def get_val(idx):
+                v = sheet.cell(row=r, column=idx).value
+                return str(v).strip() if v else None
+            
+            # Flow #
+            fid = "unknown"
+            if flow_id_indices:
+                fid = get_val(flow_id_indices[0]) or "unknown"
+            
+            # Get valid Source IPs from the row
+            # There might be "Source Public IP" AND "Source Private IP".
+            # Logic: Collect ALL valid IPs from source columns.
+            src_ips = []
+            for idx in src_ip_indices:
+                val = get_val(idx)
+                if val:
+                    found = parse_ip(val)
+                    src_ips.extend(found)
+            
+            # Destination IPs
+            dst_ips = []
+            for idx in dst_ip_indices:
+                val = get_val(idx)
+                if val:
+                    found = parse_ip(val)
+                    dst_ips.extend(found)
+                    
+            # Ports
+            ports = []
+            for idx in port_indices:
+                val = get_val(idx)
+                if val:
+                    p = parse_ports(val)
+                    ports.extend(p)
+            
+            # Cartesian Product: Source x Dest
+            # If any are missing, skip
+            if not src_ips or not dst_ips or not ports:
+                # Debug Info?
+                # print(f"Row {r}: Missing data. Src: {src_ips}, Dst: {dst_ips}, Ports: {ports}")
+                continue
+                
+            for s_ip in src_ips:
+                for d_ip in dst_ips:
+                    f = Flow(
+                        flow_id=fid,
+                        source_ip=s_ip,
+                        destination_ip=d_ip,
+                        ports=sorted(list(set(ports))) # dedup ports
+                    )
+                    flows.append(f)
+
+    return flows
--- a/wif2ansible/inventory.py
+++ b/wif2ansible/inventory.py
@@ -0,0 +1,73 @@
+from typing import List, Dict, Any
+from .models import Server, Flow
+
+def generate_inventory(servers: Dict[str, Server], flows: List[Flow]) -> Dict[str, Any]:
+    """
+    Generates the Ansible inventory dictionary.
+    servers: Dict[Reference, Server]
+    flows: List[Flow]
+    """
+    
+    # Build Lookup Map: IP -> Server
+    # Note: A server might have multiple IPs (e.g. Mgt, Public, Private).
+    # The 'Server' object mainly captures the Management IP or the one listed in the "IP Address" column.
+    # If the WIF has "Source Public IP" and that differs from "IP Address" in Servers tab, 
+    # we might miss it if we only index the primary IP.
+    # However, strict filtering means we trust the 'Servers' tab.
+    
+    ip_to_server = {}
+    for s in servers.values():
+        if s.ip_address:
+            ip_to_server[s.ip_address] = s
+            # also index by hostname/reference potentially?
+            # ip_to_server[s.reference] = s 
+            # But flows ususally have IPs.
+            
+    inventory_hosts = {}
+    
+    # Process flows
+    match_count = 0
+    drop_count = 0
+    
+    for flow in flows:
+        # Find source server
+        server = ip_to_server.get(flow.source_ip)
+        
+        if not server:
+            # Try finding by looking if source matches any server's reference/hostname?
+            # Unlikely for IPs.
+            drop_count += 1
+            if drop_count <= 5: # Debug spam limit
+                 print(f"Dropping flow {flow.flow_id}: Source {flow.source_ip} not found in Servers tab.")
+            continue
+            
+        match_count += 1
+        
+        # Prepare host entry if new
+        # We use the IP as the key in inventory 'hosts'
+        host_key = server.ip_address
+        
+        if host_key not in inventory_hosts:
+            host_vars = server.get_ansible_vars()
+            host_vars['flows'] = []
+            inventory_hosts[host_key] = host_vars
+            
+        # Add flow
+        flow_entry = {
+            'flow_id': flow.flow_id,
+            'dest': flow.destination_ip,
+            'ports': flow.ports,
+            'protocol': flow.protocol
+        }
+        
+        # Dedup check? 
+        # Ideally we shouldn't have exact duplicates, but appending is safe.
+        inventory_hosts[host_key]['flows'].append(flow_entry)
+
+    print(f"Inventory Generation Report: Matches={match_count}, Dropped={drop_count}")
+
+    return {
+        'all': {
+            'hosts': inventory_hosts
+        }
+    }
--- a/wif2ansible/main.py
+++ b/wif2ansible/main.py
@@ -0,0 +1,39 @@
+import sys
+import yaml
+import argparse
+from datetime import datetime
+from .excel_reader import read_servers, read_flows
+from .inventory import generate_inventory
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert WIF Excel to Ansible Inventory")
+    parser.add_argument("wif_file", help="Path to the WIF Excel file (.xlsx)")
+    parser.add_argument("--output", "-o", help="Output YAML file path", default=None)
+    
+    args = parser.parse_args()
+    
+    print(f"Reading servers from {args.wif_file}...")
+    servers = read_servers(args.wif_file)
+    print(f"Found {len(servers)} servers in allowlist.")
+    
+    print(f"Reading flows...")
+    flows = read_flows(args.wif_file, servers)
+    print(f"Found {len(flows)} raw flows.")
+    
+    print("Generating inventory...")
+    inventory = generate_inventory(servers, flows)
+    
+    # Determine output filename
+    if args.output:
+        outfile = args.output
+    else:
+        timestamp = datetime.now().strftime("%Y-%m-%d_%H%M")
+        outfile = f"inventory_{timestamp}.yml"
+        
+    with open(outfile, 'w') as f:
+        yaml.dump(inventory, f, default_flow_style=False)
+        
+    print(f"Successfully wrote inventory to {outfile}")
+
+if __name__ == "__main__":
+    main()
--- a/wif2ansible/models.py
+++ b/wif2ansible/models.py
@@ -0,0 +1,36 @@
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, Any
+
+@dataclass
+class Server:
+    reference: str
+    hostname: str  # This might be same as reference
+    ip_address: Optional[str] = None
+    platform: str = 'unknown' # e.g. 'Windows', 'Linux'
+    
+    def get_ansible_vars(self) -> Dict[str, Any]:
+        """Returns ansible variables based on platform."""
+        vars = {}
+        # Basic mapping - can be expanded
+        p = self.platform.lower()
+        if 'win' in p:
+            vars['ansible_connection'] = 'winrm'
+            vars['ansible_winrm_transport'] = 'ntlm'
+            vars['ansible_winrm_port'] = 5985
+            vars['ansible_winrm_server_cert_validation'] = 'ignore' # Common default, maybe safer to omit
+        elif 'lin' in p or 'rhel' in p or 'ubuntu' in p:
+             # Default ssh is usually fine, but being explicit doesn't hurt
+             pass
+        
+        return vars
+
+@dataclass
+class Flow:
+    flow_id: str
+    source_ip: str
+    destination_ip: str
+    ports: List[int]
+    protocol: str = 'tcp' 
+    
+    def __hash__(self):
+        return hash((self.flow_id, self.source_ip, self.destination_ip, tuple(sorted(self.ports)), self.protocol))
--- a/wif2ansible/parsers.py
+++ b/wif2ansible/parsers.py
@@ -0,0 +1,74 @@
+import re
+from typing import List
+
+def clean_header(header: str) -> str:
+    if not header:
+        return ""
+    # Remove HTML tags if any (from Ruby script logic)
+    header = re.sub(r'<[^>]+>', '', str(header))
+    return header.strip().lower()
+
+def parse_ports(port_str: str) -> List[int]:
+    """
+    Parses a string containing ports, ranges, or 'any'.
+    Returns a list of integer ports.
+    """
+    if not port_str:
+        return []
+    
+    s = str(port_str).lower()
+    
+    # Remove 'udp' if present to focus on port numbers, 
+    # but arguably we might want to capture protocol. 
+    # The Ruby script removed it. We'll strip it for port extraction.
+    s = re.sub(r'udp', '', s)
+    
+    ports = set()
+    
+    # Handle 'any' or 'all' - defaulting to common ports as per Ruby script
+    if 'any' in s or 'all' in s:
+        return [22, 3389, 80, 443, 3306, 5432, 8443, 60000]
+
+    # Split by common delimiters
+    parts = re.split(r'[,\n\s]+', s)
+    
+    for part in parts:
+        part = part.strip()
+        if not part:
+            continue
+            
+        # Range handling: 8000-8010
+        # The ruby script had issues with ranges, let's do it right.
+        range_match = re.match(r'^(\d+)[-](\d+)$', part)
+        if range_match:
+            start, end = map(int, range_match.groups())
+            if start <= end:
+                 # Limitation: adding huge ranges might blow up inventory size
+                 # but for Ansible 'ports' list it's better to be explicit or use range syntax.
+                 # For now, let's keep it expanded if small, or maybe just keeps the start/end?
+                 # Ruby script logic: expanded it.
+                 # We'll limit expansion to avoid DOSing ourselves.
+                 if end - start < 1000:
+                     ports.update(range(start, end + 1))
+                 else:
+                     # Fallback: just add start and end to avoid massive lists? 
+                     # Or maybe ansible allows ranges? 
+                     # Usually we list ports. Let's expand for now.
+                     ports.update(range(start, end + 1))
+            continue
+            
+        # Single port
+        if part.isdigit():
+            ports.add(int(part))
+            
+    return sorted(list(ports))
+
+def parse_ip(ip_str: str) -> List[str]:
+    """Finds all IPv4 addresses in a string."""
+    if not ip_str:
+        return []
+        
+    s = str(ip_str)
+    # Simple regex for IPv4
+    ips = re.findall(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', s)
+    return list(set(ips))
--- a/wif2ansibleinventory.rb
+++ b/wif2ansibleinventory.rb
@@ -0,0 +1,417 @@
+#!/usr/bin/ruby
+
+#USAGE NOTES
+#bundle exec ruby .\wif2ansible.rb TAS000000535469.xlsx
+
+require 'roo'
+require 'yaml'
+require 'resolv'
+require 'uri'
+require 'socket'
+
+OSCACHE = {}
+UNREACHABLE_HOSTS = []
+
+
+class Hash
+  def fuzzy_find_first(find)
+    select { |key, value| key.to_s.match(/#{find}/i) }
+  end
+  def find_first(find)
+    select { |key, value| key.to_s.match(/#{find}/i) }.values.flatten.first
+  end
+  def find_custom(find)
+    select { |key, value| key.to_s.match(/#{find}/i) }.values.flatten
+  end
+end
+
+wif_file = ARGV[0]
+command_sheet_name = ARGV[1] ? ARGV[1] : "flow app"
+portwass_switches = ARGV[2] ? ARGV[2] : "" #optionally add switches to portwas like -n
+
+begin
+  XLSX = Roo::Spreadsheet.open(wif_file, only_visible_sheets: true) 
+rescue
+  excel_fix_guide = %{
+  1. Delete Sheet: Diagram
+  2. Delete Sheet: Notes and Exceptions
+  3. Save as xls
+  4. Close Excel completely
+  5. Open xls file
+  6. Run document inspect, delete hidden rows
+  7. Save the xls as xlsx}
+  puts excel_fix_guide
+end
+
+#class Common
+def find_sheet_name(name)
+  result = []
+  XLSX.sheets.each do |sheet_name| 
+    if sheet_name.scan(/#{name.gsub(' ', '.*')}/i).any? 
+      result << sheet_name 
+    end
+  end
+  return result
+end
+
+def remove_html(string)
+  if string.class == String
+    string.split(/\<.*?\>/)
+     .map(&:strip)
+     .reject(&:empty?)
+     .join(' ')
+     .gsub(/\s,/,',').gsub('*', '').strip
+   else
+    string
+   end
+end
+
+def is_windows?(servername)
+  if OSCACHE[servername] == 'win'
+    return true
+  elsif OSCACHE[servername] == 'lin'
+    return false
+  #elsif OSCACHE[servername] == false
+  #  puts "Timeout for #{servername}, skipping..."
+  #  return false
+  end
+  
+  attempts = 0
+  begin
+    attempts+=1
+    s = TCPSocket.new servername, 3389
+    OSCACHE[servername] = 'win' if s
+    return true if s
+  rescue Errno::ECONNREFUSED
+    puts "#{servername}: Port 3389 not open, #{servername} is not a windows server"
+    s = nil
+    begin; s = TCPSocket.new servername, 22; rescue;OSCACHE[servername] = false; UNREACHABLE_HOSTS << servername;end;
+    OSCACHE[servername] = 'lin' if s
+    return false
+  rescue IO::TimeoutError
+    retry unless attempts > 2
+    puts "#{servername}: IO Timeout to #{servername}. You may not be connected to the correct EDC. Please connect your VPN or run from a JUMP server in the correct EDC"
+    OSCACHE[servername] = false
+    return false
+  end
+end
+
+def is_linux?(servername)
+  if OSCACHE[servername] == 'lin'
+    return true
+  elsif OSCACHE[servername] == 'win'
+    return false
+  #elsif OSCACHE[servername] == false
+  #  puts "Timeout for #{servername}, skipping..."
+  #  return false
+  end
+
+  attempts = 0
+  begin
+    attempts+=1
+    s = TCPSocket.new servername, 22
+    OSCACHE[servername] = 'lin'
+    return true if s
+  rescue Errno::ECONNREFUSED
+    puts "#{servername}: Port 22 not open, #{servername} is not a linux server"
+    s = nil
+    begin; s = TCPSocket.new servername, 3389; rescue;OSCACHE[servername] = false; UNREACHABLE_HOSTS << servername;end;
+    OSCACHE[servername] = 'win' if s
+    return false
+  rescue IO::TimeoutError
+    retry unless attempts > 2
+    puts "#{servername}: IO Timeout to #{servername}. You may not be connected to the correct EDC. Please connect your VPN or run from a JUMP server in the correct EDC"
+    OSCACHE[servername] = false
+    return false
+  end
+end
+
+def select_value_from_row(row, column)
+  row.each{|k,v| return [k,cell_value_to_array(v)] if not v.nil? and not k.nil? and k.gsub("\n", '').scan(/#{column.gsub(' ', '.*')}/i).any? }
+end
+
+def cell_value_to_array(value)
+  value.to_s.split(/[\n, " ", ","]/).compact.keep_if{|a| a.gsub(' ', '') != "" }
+end
+#end #Common
+
+#class Flow
+
+def is_empty_or_example_flow_row?(row)
+  nil_count = 0
+  nil_count_limit = 5
+  row.each do |k,v|
+    nil_count = nil_count + 1 if v.nil? 
+  end
+  if nil_count >= nil_count_limit or row.first[1].class == String or row.first[1].nil?
+    return true
+  else
+    return false 
+  end
+end
+
+def get_all_rows_and_find_headers(name)
+  flow_header_items = [/flow/i, /source/i, /destination/i, /public/i, /ip/i, /private/i, /port/i]
+  begin
+    sheet_name = find_sheet_name(name).last
+    puts "Using sheet: #{sheet_name}"
+    XLSX.sheet(sheet_name).parse(header_search: flow_header_items)
+  rescue Roo::HeaderRowNotFoundError
+    sheet_name = find_sheet_name(name).first
+    puts "ERROR: POSSIBLE EXTRA SHEET, trying to fix...\nUsing sheet: #{sheet_name}"
+    begin
+      XLSX.sheet(sheet_name).parse(header_search: flow_header_items)
+    rescue Roo::HeaderRowNotFoundError
+      puts "ERROR: Flow sheet table header names are incorrect in provided WIF. This script is looking for the following words: #{flow_header_items}. Fix this in Excel and use your modified WIF file."
+      puts "Specifically, I want to see 'Source Public IP' 'Source Private IP' 'Destination Public IP' 'Destination Private IP'. Add columns if they have been deleted."
+      exit 
+    end
+  end
+end
+
+def remove_udp_ports(value)
+  value = value.to_s if value.class == Array
+  value.to_s.gsub(/\d{2,5}.{1}udp/i, '')
+end
+
+def parse_ports(value)
+  value = remove_udp_ports(value)
+  value = value.to_s if value.class == Array
+  #port_ranges = value.scan(/\d{2,5}-\d{2,5}|\d{2,5} - \d{2,5}/) 
+  port_numbers = [value.scan(/\d{2,5}/)].flatten.map{|port| port.to_i}
+  #if port_ranges.any?
+  #  port_numbers = [(port_numbers + port_ranges)].flatten.compact.map{|range| range.to_s.split('-') }
+  #  port_numbers = [port_numbers].flatten!.uniq!.map{|port| port.to_i}
+  #end
+  if value.scan(/any|all/i).any? && !port_numbers.any?
+    return [22,3389,80,443,3306,5432,8443,60000] #return some frequently used ports if they requested all/any
+  else
+    return port_numbers
+  end
+end
+
+
+def is_empty_or_example_flow_row?(row)
+  all_source_ips = []
+  all_source_ips << select_value_from_row(row, 'source private ip')[1]
+  all_source_ips << select_value_from_row(row, 'source public ip')[1]
+  all_source_ips = all_source_ips.flatten.compact
+
+  all_destination_ips = []
+  all_destination_ips << select_value_from_row(row, 'destination private ip')[1]
+  all_destination_ips << select_value_from_row(row, 'destination public ip')[1]
+  all_destination_ips = all_destination_ips.flatten.compact
+
+  if !all_source_ips.any? or !all_destination_ips.any? or row.first[1].class == String or row.first[1].nil?
+    return true
+  else
+    return false 
+  end
+end
+
+#only flows that contain source ip and destination ip
+def testable_flow_rows(sheet_name)
+  begin 
+    get_all_rows_and_find_headers(sheet_name).keep_if{|a| !is_empty_or_example_flow_row?(a) }
+  rescue TypeError
+    puts "ERROR: Problem accessing sheet with '#{sheet_name}' in the name. Does this sheet exist?"
+    exit
+  end
+end
+
+def cleanup_flow_formatting(rows)
+  rows.map do |row|
+    result = {}
+    row.each do |k,v|
+      cleaned_key = remove_html(k).to_s.gsub(" ", "_").gsub("\n", "_")
+      if cleaned_key.scan(/port/i).any?
+        cleaned_value = parse_ports(remove_html(v))
+      elsif v.class == Integer
+        cleaned_value = v
+      elsif v.to_s.scan(/\b[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\b/).any? #ip addresses
+        cleaned_value = cell_value_to_array(remove_html(v)).to_s.scan(/\b[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\b/)
+      else
+        cleaned_value = cell_value_to_array(remove_html(v))
+      end
+      result[cleaned_key] = cleaned_value
+    end
+    result
+  end
+end
+#end #Flow
+
+
+#class Server
+def is_empty_or_example_server_row?(row)
+  if select_value_from_row(row, 'cpu')[1].nil? || select_value_from_row(row, 'ram')[1].nil? || select_value_from_row(row, 'server reference')[1] == "Example"
+    return true
+  else
+    return false 
+  end
+end
+
+  def find_header_in_server_sheet
+    sheet_name = find_sheet_name('server').first
+    puts "Using sheet: #{sheet_name}"
+    XLSX.sheet(sheet_name).parse(header_search: [/reference/i, /type/i, /alias/i, /platform/i, /middleware/i ]).first.map{|k,v| k}
+  end
+
+  def all_servers
+    sheet_name = find_sheet_name('server').first
+    XLSX.sheet(sheet_name).parse(header_search: [/reference/i, /type/i, /alias/i, /platform/i, /middleware/i ]).keep_if{|a| !is_empty_or_example_server_row?(a) }
+  end
+
+  def reference_to_ip(server_reference)
+    matching = []
+    all_servers.map do |row|
+      this_reference = select_value_from_row(row, 'reference')
+      if this_reference[1][0].scan(/#{server_reference}/i).any?
+        matching << row
+      end
+    end
+    return [] if !matching.any?
+    return matching.first.keep_if{|k,v| k.scan(/ip.*address/i).any? if !k.nil? and !v.nil? }.map{|k,v| v}.sort!
+  end
+
+
+  def to_mgt_ip(name)
+    begin
+      fqdn = Resolv.getname(name)
+      mgt_dns = fqdn.split('.').first + '.ds.gc.ca'
+      mgt_ip = Resolv.getaddress(mgt_dns)
+    rescue Resolv::ResolvError
+      begin
+        puts "#{name} not found in ds.gc.ca, checking pre-ds.gc.ca..."
+        fqdn = Resolv.getname(name)
+        mgt_dns = fqdn.split('.').first + '.pre-ds.gc.ca'
+        mgt_ip = Resolv.getaddress(mgt_dns)
+      rescue Resolv::ResolvError
+        puts "#{name} is not a server OR no DNS entries exist in ds.gc.ca or pre-ds.gc.ca, skipping source..."
+      end
+    end
+    #return mgt_ip
+    return mgt_dns if mgt_ip.to_s.length > 4
+  end
+
+
+  def flows_by_host_to_ansible_inventory_yaml(flows_by_host)
+    return {"all" => {"hosts" => flows_by_host}}.to_yaml
+  end
+
+#class Result
+  def parse_portwass(stdout)
+    stdout.scan(/^(\d{2,5}):\ (\w{4})/)
+  end
+#end
+
+ansible_inventory_hash ={}
+
+ansible_tasks = []
+flows = []
+if ARGV[1]
+  begin
+    flows = cleanup_flow_formatting(testable_flow_rows(ARGV[1]))
+  rescue
+    puts "INFO: unable to parse sheet containing '#{ARGV[1]}'"
+    exit
+  end
+end
+begin
+  flows << cleanup_flow_formatting(testable_flow_rows('flow app'))
+rescue
+  puts "INFO: unable to parse sheet containing 'flow app'"
+end
+begin
+  flows << cleanup_flow_formatting(testable_flow_rows('flow man'))
+rescue
+  puts "INFO: unable to parse sheet containing 'flow man'"
+end
+flows.flatten!
+
+results = {}
+mgt_ip_list = []
+failed_portwass_cmds = []
+flows_count = 0
+flows.each do |flow|
+  puts "\n\n#{'#'*8} Parsing flow number: #{flow['Flow_#']} #{'#'*8}"
+  if results[flow['Flow_#']].nil?
+    results[flow['Flow_#']] = {}
+    results[flow['Flow_#']]['connections'] = [] 
+  end
+  if flow["Source_Public_IP"].nil? || flow["Source_Private_IP"].nil? || flow["Destination_Private_IP"].nil? || flow["Source_Public_IP"].nil?
+    puts "ERROR IN SPREADSHEET:\n\n Please ensure there are columns named (case sensitive) \"Source Private IP\", \"Source Public IP\", \"Destination Private IP\", \"Destination Public IP\"\n\n Please update the names of columns and possibly add empty columns with these names if they have been combined."
+    exit
+  elsif flow["Flow_#"].nil?
+    puts "ERROR IN SPREADSHEET:\n\n Please ensure the Flow # column is named (case sensitive) \"Flow #\""
+    exit
+  end
+  flow_src_ips =  flow["Source_Public_IP"].any? ? flow["Source_Public_IP"] : flow["Source_Private_IP"]
+  flow_src_ips.each do |src_ip|
+    mgt_ip = to_mgt_ip(src_ip)
+    mgt_ip_list << mgt_ip
+    flow_dst_ips = flow["Destination_Public_IP"].any? ? flow["Destination_Public_IP"] : flow["Destination_Private_IP"]
+    puts "Destination IPs empty for flow #{flow['Flow_#']}, skipping" && next if flow_dst_ips.nil? || !flow_dst_ips.any?
+    flow_dst_ips.each do |dst_ip|
+
+      
+
+      if mgt_ip.to_s.length < 3
+        puts "skipping #{mgt_ip} #{src_ip} as I don't think it's a windows/linux server" 
+        next
+      end
+      if ansible_inventory_hash[mgt_ip].nil?
+        if is_linux? mgt_ip
+          ansible_inventory_hash.merge!({mgt_ip => {"flows" => []}  })
+        elsif is_windows? mgt_ip
+          ansible_inventory_hash.merge!({mgt_ip => {"ansible_connection" => "winrm", "ansible_winrm_transport" => "ntlm", "ansible_winrm_port" => 5985, "flows" => []}  })
+        else
+          #add to list fo unreachable hosts to output at end
+          UNREACHABLE_HOSTS << mgt_ip
+          UNREACHABLE_HOSTS << src_ip
+        end
+
+        #puts ansible_inventory_hash.to_yaml
+      end
+      begin
+        a ={ "flow_number" => flow['Flow_#'], "dest" => dst_ip, "ports" => [flow.find_custom("Port")].flatten.uniq } 
+        ansible_inventory_hash[mgt_ip]["flows"] << a
+        puts "#{mgt_ip} : #{a}"
+        flows_count +=1
+      rescue NoMethodError
+        puts "SKIPPING ERROR: #{mgt_ip} flow #{flow['Flow_#']}"
+      end
+
+
+
+    end#dst
+  end#src
+
+  #end
+
+end#flows
+
+
+puts ansible_inventory_hash
+puts flows_by_host_to_ansible_inventory_yaml(ansible_inventory_hash)
+
+
+
+ansible_inventory_filename = "#{File.basename(wif_file, "xlsx")[0..22]}_inventory_#{Time.now.strftime("%d-%m-%Y_%H.%M")}.yml"
+File.open ansible_inventory_filename, 'w' do |file|
+  file.write flows_by_host_to_ansible_inventory_yaml(ansible_inventory_hash)
+end
+
+
+
+puts "="*20
+puts "Source servers found: #{ansible_inventory_hash.count}"
+
+puts "Total connections: #{flows_count}"
+puts "Generated inventory: #{ansible_inventory_filename}"
+puts "="*20
+
+if UNREACHABLE_HOSTS.compact.uniq.any?
+  puts "The following [#{UNREACHABLE_HOSTS.compact.uniq.count}] servers could not be reached. Either they are in another datacentre or arent windows/linux servers:"
+  puts UNREACHABLE_HOSTS.compact.uniq.join("\n")
+end
+