EVOLUTION-MANAGER
Edit File: vmware_cluster_ha.py
#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright: (c) 2015, Joseph Callen <jcallen () csc.com> # Copyright: (c) 2018, Ansible Project # # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) from __future__ import absolute_import, division, print_function __metaclass__ = type ANSIBLE_METADATA = { 'metadata_version': '1.1', 'status': ['preview'], 'supported_by': 'community' } DOCUMENTATION = r''' --- module: vmware_cluster_ha short_description: Manage High Availability (HA) on VMware vSphere clusters description: - Manages HA configuration on VMware vSphere clusters. - All values and VMware object names are case sensitive. version_added: '2.9' author: - Joseph Callen (@jcpowermac) - Abhijeet Kasurde (@Akasurde) requirements: - Tested on ESXi 5.5 and 6.5. - PyVmomi installed. options: cluster_name: description: - The name of the cluster to be managed. type: str required: yes datacenter: description: - The name of the datacenter. type: str required: yes aliases: [ datacenter_name ] enable_ha: description: - Whether to enable HA. type: bool default: 'no' ha_host_monitoring: description: - Whether HA restarts virtual machines after a host fails. - If set to C(enabled), HA restarts virtual machines after a host fails. - If set to C(disabled), HA does not restart virtual machines after a host fails. - If C(enable_ha) is set to C(no), then this value is ignored. type: str choices: [ 'enabled', 'disabled' ] default: 'enabled' ha_vm_monitoring: description: - State of virtual machine health monitoring service. - If set to C(vmAndAppMonitoring), HA response to both virtual machine and application heartbeat failure. - If set to C(vmMonitoringDisabled), virtual machine health monitoring is disabled. - If set to C(vmMonitoringOnly), HA response to virtual machine heartbeat failure. - If C(enable_ha) is set to C(no), then this value is ignored. type: str choices: ['vmAndAppMonitoring', 'vmMonitoringOnly', 'vmMonitoringDisabled'] default: 'vmMonitoringDisabled' host_isolation_response: description: - Indicates whether or VMs should be powered off if a host determines that it is isolated from the rest of the compute resource. - If set to C(none), do not power off VMs in the event of a host network isolation. - If set to C(powerOff), power off VMs in the event of a host network isolation. - If set to C(shutdown), shut down VMs guest operating system in the event of a host network isolation. type: str choices: ['none', 'powerOff', 'shutdown'] default: 'none' slot_based_admission_control: description: - Configure slot based admission control policy. - C(slot_based_admission_control), C(reservation_based_admission_control) and C(failover_host_admission_control) are mutually exclusive. suboptions: failover_level: description: - Number of host failures that should be tolerated. type: int required: true type: dict reservation_based_admission_control: description: - Configure reservation based admission control policy. - C(slot_based_admission_control), C(reservation_based_admission_control) and C(failover_host_admission_control) are mutually exclusive. suboptions: failover_level: description: - Number of host failures that should be tolerated. type: int required: true auto_compute_percentages: description: - By default, C(failover_level) is used to calculate C(cpu_failover_resources_percent) and C(memory_failover_resources_percent). If a user wants to override the percentage values, he has to set this field to false. type: bool default: true cpu_failover_resources_percent: description: - Percentage of CPU resources in the cluster to reserve for failover. Ignored if C(auto_compute_percentages) is not set to false. type: int default: 50 memory_failover_resources_percent: description: - Percentage of memory resources in the cluster to reserve for failover. Ignored if C(auto_compute_percentages) is not set to false. type: int default: 50 type: dict failover_host_admission_control: description: - Configure dedicated failover hosts. - C(slot_based_admission_control), C(reservation_based_admission_control) and C(failover_host_admission_control) are mutually exclusive. suboptions: failover_hosts: description: - List of dedicated failover hosts. type: list required: true type: dict ha_vm_failure_interval: description: - The number of seconds after which virtual machine is declared as failed if no heartbeat has been received. - This setting is only valid if C(ha_vm_monitoring) is set to, either C(vmAndAppMonitoring) or C(vmMonitoringOnly). - Unit is seconds. type: int default: 30 ha_vm_min_up_time: description: - The number of seconds for the virtual machine's heartbeats to stabilize after the virtual machine has been powered on. - Valid only when I(ha_vm_monitoring) is set to either C(vmAndAppMonitoring) or C(vmMonitoringOnly). - Unit is seconds. type: int default: 120 ha_vm_max_failures: description: - Maximum number of failures and automated resets allowed during the time that C(ha_vm_max_failure_window) specifies. - Valid only when I(ha_vm_monitoring) is set to either C(vmAndAppMonitoring) or C(vmMonitoringOnly). type: int default: 3 ha_vm_max_failure_window: description: - The number of seconds for the window during which up to C(ha_vm_max_failures) resets can occur before automated responses stop. - Valid only when I(ha_vm_monitoring) is set to either C(vmAndAppMonitoring) or C(vmMonitoringOnly). - Unit is seconds. - Default specifies no failure window. type: int default: -1 ha_restart_priority: description: - Priority HA gives to a virtual machine if sufficient capacity is not available to power on all failed virtual machines. - Valid only if I(ha_vm_monitoring) is set to either C(vmAndAppMonitoring) or C(vmMonitoringOnly). - If set to C(disabled), then HA is disabled for this virtual machine. - If set to C(high), then virtual machine with this priority have a higher chance of powering on after a failure, when there is insufficient capacity on hosts to meet all virtual machine needs. - If set to C(medium), then virtual machine with this priority have an intermediate chance of powering on after a failure, when there is insufficient capacity on hosts to meet all virtual machine needs. - If set to C(low), then virtual machine with this priority have a lower chance of powering on after a failure, when there is insufficient capacity on hosts to meet all virtual machine needs. type: str default: 'medium' choices: [ 'disabled', 'high', 'low', 'medium' ] extends_documentation_fragment: vmware.documentation ''' EXAMPLES = r""" - name: Enable HA without admission control vmware_cluster_ha: hostname: '{{ vcenter_hostname }}' username: '{{ vcenter_username }}' password: '{{ vcenter_password }}' datacenter_name: datacenter cluster_name: cluster enable_ha: yes delegate_to: localhost - name: Enable HA and VM monitoring without admission control vmware_cluster_ha: hostname: "{{ vcenter_hostname }}" username: "{{ vcenter_username }}" password: "{{ vcenter_password }}" validate_certs: no datacenter_name: DC0 cluster_name: "{{ cluster_name }}" enable_ha: True ha_vm_monitoring: vmMonitoringOnly delegate_to: localhost - name: Enable HA with admission control reserving 50% of resources for HA vmware_cluster_ha: hostname: '{{ vcenter_hostname }}' username: '{{ vcenter_username }}' password: '{{ vcenter_password }}' datacenter_name: datacenter cluster_name: cluster enable_ha: yes reservation_based_admission_control: auto_compute_percentages: False failover_level: 1 cpu_failover_resources_percent: 50 memory_failover_resources_percent: 50 delegate_to: localhost """ RETURN = r"""# """ try: from pyVmomi import vim, vmodl except ImportError: pass from ansible.module_utils.basic import AnsibleModule from ansible.module_utils.vmware import (PyVmomi, TaskError, find_datacenter_by_name, vmware_argument_spec, wait_for_task) from ansible.module_utils._text import to_native class VMwareCluster(PyVmomi): def __init__(self, module): super(VMwareCluster, self).__init__(module) self.cluster_name = module.params['cluster_name'] self.datacenter_name = module.params['datacenter'] self.enable_ha = module.params['enable_ha'] self.datacenter = None self.cluster = None self.host_isolation_response = getattr(vim.cluster.DasVmSettings.IsolationResponse, self.params.get('host_isolation_response')) if self.enable_ha and ( self.params.get('slot_based_admission_control') or self.params.get('reservation_based_admission_control') or self.params.get('failover_host_admission_control')): self.ha_admission_control = True else: self.ha_admission_control = False self.datacenter = find_datacenter_by_name(self.content, self.datacenter_name) if self.datacenter is None: self.module.fail_json(msg="Datacenter %s does not exist." % self.datacenter_name) self.cluster = self.find_cluster_by_name(cluster_name=self.cluster_name) if self.cluster is None: self.module.fail_json(msg="Cluster %s does not exist." % self.cluster_name) def get_failover_hosts(self): """ Get failover hosts for failover_host_admission_control policy Returns: List of ESXi hosts sorted by name """ policy = self.params.get('failover_host_admission_control') hosts = [] all_hosts = dict((h.name, h) for h in self.get_all_hosts_by_cluster(self.cluster_name)) for host in policy.get('failover_hosts'): if host in all_hosts: hosts.append(all_hosts.get(host)) else: self.module.fail_json(msg="Host %s is not a member of cluster %s." % (host, self.cluster_name)) hosts.sort(key=lambda h: h.name) return hosts def check_ha_config_diff(self): """ Check HA configuration diff Returns: True if there is diff, else False """ das_config = self.cluster.configurationEx.dasConfig if das_config.enabled != self.enable_ha: return True if self.enable_ha and ( das_config.vmMonitoring != self.params.get('ha_vm_monitoring') or das_config.hostMonitoring != self.params.get('ha_host_monitoring') or das_config.admissionControlEnabled != self.ha_admission_control or das_config.defaultVmSettings.restartPriority != self.params.get('ha_restart_priority') or das_config.defaultVmSettings.isolationResponse != self.host_isolation_response or das_config.defaultVmSettings.vmToolsMonitoringSettings.vmMonitoring != self.params.get('ha_vm_monitoring') or das_config.defaultVmSettings.vmToolsMonitoringSettings.failureInterval != self.params.get('ha_vm_failure_interval') or das_config.defaultVmSettings.vmToolsMonitoringSettings.minUpTime != self.params.get('ha_vm_min_up_time') or das_config.defaultVmSettings.vmToolsMonitoringSettings.maxFailures != self.params.get('ha_vm_max_failures') or das_config.defaultVmSettings.vmToolsMonitoringSettings.maxFailureWindow != self.params.get('ha_vm_max_failure_window')): return True if self.ha_admission_control: if self.params.get('slot_based_admission_control'): policy = self.params.get('slot_based_admission_control') if not isinstance(das_config.admissionControlPolicy, vim.cluster.FailoverLevelAdmissionControlPolicy) or \ das_config.admissionControlPolicy.failoverLevel != policy.get('failover_level'): return True elif self.params.get('reservation_based_admission_control'): policy = self.params.get('reservation_based_admission_control') auto_compute_percentages = policy.get('auto_compute_percentages') if not isinstance(das_config.admissionControlPolicy, vim.cluster.FailoverResourcesAdmissionControlPolicy) or \ das_config.admissionControlPolicy.autoComputePercentages != auto_compute_percentages or \ das_config.admissionControlPolicy.failoverLevel != policy.get('failover_level'): return True if not auto_compute_percentages: if das_config.admissionControlPolicy.cpuFailoverResourcesPercent != policy.get('cpu_failover_resources_percent') or \ das_config.admissionControlPolicy.memoryFailoverResourcesPercent != policy.get('memory_failover_resources_percent'): return True elif self.params.get('failover_host_admission_control'): policy = self.params.get('failover_host_admission_control') if not isinstance(das_config.admissionControlPolicy, vim.cluster.FailoverHostAdmissionControlPolicy): return True das_config.admissionControlPolicy.failoverHosts.sort(key=lambda h: h.name) if das_config.admissionControlPolicy.failoverHosts != self.get_failover_hosts(): return True return False def configure_ha(self): """ Manage HA Configuration """ changed, result = False, None if self.check_ha_config_diff(): if not self.module.check_mode: cluster_config_spec = vim.cluster.ConfigSpecEx() cluster_config_spec.dasConfig = vim.cluster.DasConfigInfo() cluster_config_spec.dasConfig.enabled = self.enable_ha if self.enable_ha: vm_tool_spec = vim.cluster.VmToolsMonitoringSettings() vm_tool_spec.enabled = True vm_tool_spec.vmMonitoring = self.params.get('ha_vm_monitoring') vm_tool_spec.failureInterval = self.params.get('ha_vm_failure_interval') vm_tool_spec.minUpTime = self.params.get('ha_vm_min_up_time') vm_tool_spec.maxFailures = self.params.get('ha_vm_max_failures') vm_tool_spec.maxFailureWindow = self.params.get('ha_vm_max_failure_window') das_vm_config = vim.cluster.DasVmSettings() das_vm_config.restartPriority = self.params.get('ha_restart_priority') das_vm_config.isolationResponse = self.host_isolation_response das_vm_config.vmToolsMonitoringSettings = vm_tool_spec cluster_config_spec.dasConfig.defaultVmSettings = das_vm_config cluster_config_spec.dasConfig.admissionControlEnabled = self.ha_admission_control if self.ha_admission_control: if self.params.get('slot_based_admission_control'): cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverLevelAdmissionControlPolicy() policy = self.params.get('slot_based_admission_control') cluster_config_spec.dasConfig.admissionControlPolicy.failoverLevel = policy.get('failover_level') elif self.params.get('reservation_based_admission_control'): cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverResourcesAdmissionControlPolicy() policy = self.params.get('reservation_based_admission_control') auto_compute_percentages = policy.get('auto_compute_percentages') cluster_config_spec.dasConfig.admissionControlPolicy.autoComputePercentages = auto_compute_percentages cluster_config_spec.dasConfig.admissionControlPolicy.failoverLevel = policy.get('failover_level') if not auto_compute_percentages: cluster_config_spec.dasConfig.admissionControlPolicy.cpuFailoverResourcesPercent = \ policy.get('cpu_failover_resources_percent') cluster_config_spec.dasConfig.admissionControlPolicy.memoryFailoverResourcesPercent = \ policy.get('memory_failover_resources_percent') elif self.params.get('failover_host_admission_control'): cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverHostAdmissionControlPolicy() policy = self.params.get('failover_host_admission_control') cluster_config_spec.dasConfig.admissionControlPolicy.failoverHosts = self.get_failover_hosts() cluster_config_spec.dasConfig.hostMonitoring = self.params.get('ha_host_monitoring') cluster_config_spec.dasConfig.vmMonitoring = self.params.get('ha_vm_monitoring') try: task = self.cluster.ReconfigureComputeResource_Task(cluster_config_spec, True) changed, result = wait_for_task(task) except vmodl.RuntimeFault as runtime_fault: self.module.fail_json(msg=to_native(runtime_fault.msg)) except vmodl.MethodFault as method_fault: self.module.fail_json(msg=to_native(method_fault.msg)) except TaskError as task_e: self.module.fail_json(msg=to_native(task_e)) except Exception as generic_exc: self.module.fail_json(msg="Failed to update cluster" " due to generic exception %s" % to_native(generic_exc)) else: changed = True self.module.exit_json(changed=changed, result=result) def main(): argument_spec = vmware_argument_spec() argument_spec.update(dict( cluster_name=dict(type='str', required=True), datacenter=dict(type='str', required=True, aliases=['datacenter_name']), # HA enable_ha=dict(type='bool', default=False), ha_host_monitoring=dict(type='str', default='enabled', choices=['enabled', 'disabled']), host_isolation_response=dict(type='str', default='none', choices=['none', 'powerOff', 'shutdown']), # HA VM Monitoring related parameters ha_vm_monitoring=dict(type='str', choices=['vmAndAppMonitoring', 'vmMonitoringOnly', 'vmMonitoringDisabled'], default='vmMonitoringDisabled'), ha_vm_failure_interval=dict(type='int', default=30), ha_vm_min_up_time=dict(type='int', default=120), ha_vm_max_failures=dict(type='int', default=3), ha_vm_max_failure_window=dict(type='int', default=-1), ha_restart_priority=dict(type='str', choices=['high', 'low', 'medium', 'disabled'], default='medium'), # HA Admission Control related parameters slot_based_admission_control=dict(type='dict', options=dict( failover_level=dict(type='int', required=True), )), reservation_based_admission_control=dict(type='dict', options=dict( auto_compute_percentages=dict(type='bool', default=True), failover_level=dict(type='int', required=True), cpu_failover_resources_percent=dict(type='int', default=50), memory_failover_resources_percent=dict(type='int', default=50), )), failover_host_admission_control=dict(type='dict', options=dict( failover_hosts=dict(type='list', elements='str', required=True), )), )) module = AnsibleModule( argument_spec=argument_spec, supports_check_mode=True, mutually_exclusive=[ ['slot_based_admission_control', 'reservation_based_admission_control', 'failover_host_admission_control'] ] ) vmware_cluster_ha = VMwareCluster(module) vmware_cluster_ha.configure_ha() if __name__ == '__main__': main()