diff --git a/.gitignore b/.gitignore index 7b55d73..65eb7ff 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ *.pyc *.swp build +.project +.pydevproject +logs + diff --git a/Makefile b/Makefile index 565db55..5ac18a8 100644 --- a/Makefile +++ b/Makefile @@ -7,10 +7,10 @@ else Q = @ endif -all: eggs $(ROOTFS) check_convention +all: $(ROOTFS) check_convention check_convention: - pep8 py --max-line-length=109 + pep8 py --max-line-length=150 submit: sudo -E solvent submitproduct rootfs $(ROOTFS) @@ -22,20 +22,15 @@ approve: sudo -E solvent approve --product=rootfs clean: - sudo rm -fr build - -eggs: build/master.egg - -build/master.egg: - $(Q)mkdir -p build - PYTHONPATH=py UPSETO_JOIN_PYTHON_NAMESPACES=yes python -m upseto.packegg --entryPoint=py/rackattack/dryrun/master/main.py --output=$@ --createDeps=$@.deps --takeSitePackages --joinPythonNamespaces --include build/master.egg.deps + @sudo rm -fr build + @find -name "*.pyc" -delete + $(ROOTFS): build/smartctl -sudo mv $(ROOTFS) $(ROOTFS).tmp echo "Bringing source" -mkdir $(@D) - sudo -E solvent bring --repositoryBasename=rootfs-basic --product=rootfs --destination=$(ROOTFS).tmp + sudo -E solvent bring --repositoryBasename=rootfs-centos7-basic --product=rootfs --destination=$(ROOTFS).tmp sudo chroot $(ROOTFS).tmp yum install $(RPMS_TO_INSTALL) --assumeyes sudo mkdir $(ROOTFS).tmp/usr/share/inaugurator sudo cp ../inaugurator/build/inaugurator.thin.initrd.img ../inaugurator/build/inaugurator.vmlinuz $(ROOTFS).tmp/usr/share/inaugurator @@ -43,6 +38,12 @@ $(ROOTFS): build/smartctl sudo cp ../inaugurator/dist/inaugurator-1.0-py2.7.egg $(ROOTFS).tmp/tmp sudo chroot $(ROOTFS).tmp easy_install /tmp/inaugurator-1.0-py2.7.egg sudo chroot $(ROOTFS).tmp yum install --assumeyes $(YUMCACHE)/mirrors.kernel.org/fedora-epel/7/x86_64/m/msr-tools-1.3-1.el7.x86_64.rpm + sudo chroot $(ROOTFS).tmp yum install --assumeyes $(YUMCACHE)/mirrors.kernel.org/fedora-epel/7/x86_64/v/vconfig-1.9-16.el7.x86_64.rpm + sudo chroot $(ROOTFS).tmp yum install --assumeyes $(YUMCACHE)/mirrors.kernel.org/fedora-epel/7/x86_64/p/pigz-2.3.1-1.el7.x86_64.rpm + sudo chroot $(ROOTFS).tmp pip install rpdb + sudo chroot $(ROOTFS).tmp pip install ipaddr + sudo sh -c "echo 'MaxSessions 300' >> $(ROOTFS).tmp/etc/ssh/sshd_config" + sudo sh -c "echo 'UseDNS no' >> $(ROOTFS).tmp/etc/ssh/sshd_config" sudo cp $< $(ROOTFS).tmp/usr/sbin/ sudo rm -fr $(ROOTFS).tmp/tmp/* sudo mv $(ROOTFS).tmp $(ROOTFS) @@ -69,5 +70,7 @@ RPMS_TO_INSTALL = \ strace \ zip \ tcpdump \ - unzip + unzip \ + pciutils \ + nmap-ncat diff --git a/py/rackattack/dryrun/common/__init__.py b/py/rackattack/dryrun/common/__init__.py new file mode 100644 index 0000000..48e952a --- /dev/null +++ b/py/rackattack/dryrun/common/__init__.py @@ -0,0 +1,2 @@ +import upseto.pythonnamespacejoin +__path__.extend(upseto.pythonnamespacejoin.join(globals())) diff --git a/py/rackattack/dryrun/common/waitforpredicate.py b/py/rackattack/dryrun/common/waitforpredicate.py new file mode 100644 index 0000000..47f3418 --- /dev/null +++ b/py/rackattack/dryrun/common/waitforpredicate.py @@ -0,0 +1,18 @@ +import time +import logging + + +class WaitForPredicate: + + def __init__(self, timeout=3, interval=0.1): + self._timeout = timeout + self._interval = interval + + def waitAndReturn(self, predicate, * args, ** kwargs): + before = time.time() + while time.time() - before < self._timeout: + ret = predicate(* args, ** kwargs) + if ret: + return ret + time.sleep(self._interval) + raise Exception("Predicate '%s' did not happen within timeout" % predicate) diff --git a/py/rackattack/dryrun/dryrunhost.py b/py/rackattack/dryrun/dryrunhost.py new file mode 100644 index 0000000..4755b43 --- /dev/null +++ b/py/rackattack/dryrun/dryrunhost.py @@ -0,0 +1,61 @@ +from rackattack.ssh import connection +from strato.racktest.hostundertest import plugins + +import strato.racktest.hostundertest.builtinplugins.rpm +import strato.racktest.hostundertest.builtinplugins.seed +from rackattack import ssh +import paramiko + +from rackattack.ssh import ftp +from rackattack.ssh import run +from rackattack.ssh import dirftp +from rackattack.ssh import tunnel + + +class DryRunHost(object): + + def __init__(self, node, credentials): + self.name = node.name() + self.ssh = ProxySSHConnection(node.masterHost, node.ipAddress(), credentials) + self.__plugins = {} + self.node = node + + def __getattr__(self, name): + if name not in self.__plugins: + self.__plugins[name] = plugins.plugins[name](self) + return self.__plugins[name] + + +class ProxySSHConnection(object): + + def __init__(self, masterHost, destIp, credentials): + self._masterHost = masterHost + self._destIp = destIp + self._credentials = credentials + self._sshClient = None + + @property + def run(self): + return run.Run(self._sshClient) + + @property + def ftp(self): + return ftp.FTP(self._sshClient) + + @property + def dirFTP(self): + return dirftp.DirFTP(self._sshClient) + + def close(self): + self._sshClient.close() + self._sshClient = None + + def connect(self): + transport = self._masterHost.ssh._sshClient.get_transport() + dst = (self._destIp, 22) + src = ('127.0.0.1', 0) + commChannel = transport.open_channel("direct-tcpip", dst, src) + self._sshClient = paramiko.client.SSHClient() + self._sshClient.known_hosts = None + self._sshClient.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + self._sshClient.connect(src[0], port=src[1], sock=commChannel, **self._credentials) diff --git a/py/rackattack/dryrun/healthchecher.py b/py/rackattack/dryrun/healthchecher.py new file mode 100644 index 0000000..7bb96ed --- /dev/null +++ b/py/rackattack/dryrun/healthchecher.py @@ -0,0 +1,94 @@ +import logging +from rackattack.dryrun.common import waitforpredicate +from strato.common.multithreading import concurrently +import pprint +import servertestresult +import threading +import sys + + +def _verifyVmxEnabledByBios(host, resultObject): + IA32_FEATURE_CONTROL = '0x3a' + VMXON_BIT = 2 + LOCK_BIT = 0 + VMX_ENABLED = ((1 << VMXON_BIT) | (1 << LOCK_BIT)) + regvalue = int(host.kernel.rdmsr(IA32_FEATURE_CONTROL)) + result = True + log = '' + if(regvalue & VMX_ENABLED) != VMX_ENABLED: + log = "VMX is not enabled in bios register val %(regvalue)x" % dict(regvalue=int(regvalue)) + result = False + resultObject.addCheck('virt', 'virtualization bios', result, log) + + +def _verifyVirtualizationEnabled(host, resultObject): + info = host.kernel.cpuinfo() + output = '' + result = True + if not info.hasVt(): + result = False + output = "Virtualization is not supported on %(hostname)s cpuninfo %(cpuinfo)s" % dict( + hostname=host.name, cpuinfo=pprint.pprint(info)) + resultObject.addCheck('virt', 'virtualization cpu support', result, output) + if info.hasFlag(0, 'vmx'): + _verifyVmxEnabledByBios(host, resultObject) + + +def _checkDisk(hostToCheck, resultObject): + result, output = hostToCheck.disk.smartctlStatus('/dev/sda') + resultObject.addCheck('disk', 'smartctl /dev/sda', result, output) + resultObject.addCheck('disk', 'SSD /dev/sda', not hostToCheck.disk.rotational('sda')) + + +def _pingScript(ip, deviceName): + return "ping -c 2 %(ip)s -I %(device)s" % dict(ip=ip, device=deviceName) + + +def _runPing(srcHost, dstHost, netName, testResult, lock): + ipDst = dstHost.network.networks[netName]['ip'] + srcDevice = srcHost.network.networks[netName]['device'] + log = '' + pingScript = _pingScript(ipDst, srcDevice) + try: + srcHost.ssh.run.script(pingScript) + lock.acquire() + testResult.addCheck('net', 'ping on %(netName)s from %(src)s to %(dest)s "%(script)s"' % + dict(netName=netName, src=srcHost.name, dest=dstHost.name, script=pingScript), + True, '', (netName, srcHost.name, dstHost.name)) + lock.release() + except: + log = "Failed pinging from host %(srchost)s to %(dstHost)s to ip %(ip)s" % dict( + srchost=srcHost.name, dstHost=dstHost.name, ip=ipDst) + lock.acquire() + testResult.addCheck('net', 'ping on %(netName)s from %(src)s to %(dest)s "%(script)s" exception %(exception)s' % + dict(netName=netName, src=srcHost.name, dest=dstHost.name, script=pingScript, exception=sys.exc_info()[1].message), + False, log, (netName, srcHost.name, dstHost.name)) + lock.release() + + +def _checkNetwork(node1, node2, vlanTags, testResult, lock): + for netName in ['untaged'] + vlanTags: + _runPing(node1, node2, netName, testResult, lock) + _runPing(node2, node1, netName, testResult, lock) + + +def checkServer(serverToCheck, serversToCheckNetwork, testResult, vlanTags): + logging.info("Going to check %(server)s", dict(server=serverToCheck.name)) + _verifyVirtualizationEnabled(serverToCheck, testResult) + _checkDisk(serverToCheck, testResult) + lock = threading.Lock() + jobs = {server.name: (_checkNetwork, server, serverToCheck, vlanTags, testResult, lock) + for server in serversToCheckNetwork} + concurrently.run(jobs, numberOfThreads=30) + return testResult + + +def _partnerServer(masterHost, serversToCheck, serverToCheck): + return [masterHost] + [server for server in serversToCheck if server is not serverToCheck] + + +def checkServers(masterHost, hostsToProced, vlanTags): + allHosts = [host['host'] for host in hostsToProced] + jobs = {host['name']: (checkServer, host['host'], _partnerServer(masterHost, allHosts, host['host']), host['result'], vlanTags) + for host in hostsToProced} + concurrently.run(jobs, numberOfThreads=10) diff --git a/py/rackattack/dryrun/master/__init__.py b/py/rackattack/dryrun/lib/__init__.py similarity index 100% rename from py/rackattack/dryrun/master/__init__.py rename to py/rackattack/dryrun/lib/__init__.py diff --git a/py/rackattack/dryrun/lib/cpuinfo.py b/py/rackattack/dryrun/lib/cpuinfo.py new file mode 100644 index 0000000..8607812 --- /dev/null +++ b/py/rackattack/dryrun/lib/cpuinfo.py @@ -0,0 +1,19 @@ +class CpuInfo(dict): + + def __init__(self, cpuinfoString): + processor = {} + for cpuinfoLine in cpuinfoString.split('\n'): + if len(cpuinfoLine.strip()) == 0: + if len(processor.keys()) == 0: + continue + self[int(processor['processor'])] = processor + processor = {} + continue + (k, v) = cpuinfoLine.split(':') + processor[k.strip()] = v.strip() + + def hasFlag(self, processorNum, flagName): + return flagName in self[processorNum]['flags'] + + def hasVt(self, processorNum=0): + return self.hasFlag(processorNum, 'vmx') or self.hasFlag(processorNum, 'svm') diff --git a/py/rackattack/dryrun/main.py b/py/rackattack/dryrun/main.py index 8d5529c..101e169 100644 --- a/py/rackattack/dryrun/main.py +++ b/py/rackattack/dryrun/main.py @@ -1,57 +1,288 @@ import logging -logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +import paramiko +from strato.common.log import configurelogging +import pprint +import sys +from rackattack.dryrun import servertestresult +import traceback +import copy +configurelogging.configureLogging('dryrun', forceDirectory='logs') import yaml import argparse -from rackattack import clientfactory from rackattack import api from rackattack.ssh import connection import subprocess import socket import time +import healthchecher +from rackattack import clientfactory +from rackattack.physical import ipmi +from plugins import kernel +from plugins import disk +from plugins import network +from plugins import logplugin +from strato.racktest.hostundertest import host +from rackattack.dryrun import dryrunhost +from rackattack.dryrun import node +from rackattack.dryrun.seeds import innaugurator +from strato.common.multithreading import concurrently + parser = argparse.ArgumentParser() parser.add_argument("--rackYaml", required=True) -parser.add_argument("--targetNode", required=True) parser.add_argument("--rackattackUser", required=True) -parser.add_argument("--ipAddress", required=True) parser.add_argument("--osmosisServerIP", required=True) +parser.add_argument("--ipAddress", required=True, action='append') +parser.add_argument("--targetNode", required=True, action='append') +parser.add_argument("--vlan", action='append', default=[], type=int) +parser.add_argument("--debug", action='store_true') +parser.add_argument("--noClearDisk", action='store_true') + + args = parser.parse_args() + +def allocateMasterHost(rackuser, label): + client = clientfactory.factory() + logging.info("Allocating master node") + allocationInfo = api.AllocationInfo(user=rackuser, purpose="dryrun") + requirements = dict(master=api.Requirement(imageLabel=label, imageHint="rootfs-centos7-basic")) + allocation = client.allocate(requirements, allocationInfo) + allocation.wait(timeout=5 * 60) + logging.info("Allocation successful, waiting for ssh") + masterHost = host.Host(allocation.nodes()['master'], 'master') + masterHost.ssh.waitForTCPServer() + masterHost.ssh.connect() + return masterHost + + +def _allocateTestNodes(masterHost, hostsToInnagurate): + innaguratedHosts = [] + logging.info("Going to innagurate %(servers)d servers...be patient", dict(servers=len(hostsToInnagurate))) + + hostDescriptors = [host['props'] for host in hostsToInnagurate] + failedNodes, log = masterHost.seed.runCallable(innaugurator.innaugurate, + osmosisServerIP=args.osmosisServerIP, + rootfsLabel=label, + nodesToInnagurate=hostDescriptors, + noClearDisk=args.noClearDisk, + outputTimeout=30 * 60) + if len(failedNodes) > 0: + logging.error("Failed to innagurate %(nodes)d nodes log %(log)s", dict(nodes=len(failedNodes), log=log)) + + for host in hostsToInnagurate: + if host['name'] in failedNodes: + host['result'].addCheck('init', 'innaugurate', False, failedNodes[host['name']]) + else: + host['result'].addCheck('init', 'innaugurate', True) + host['host'] = dryrunhost.DryRunHost(host['node'], dict(username='root', password='dryrun')) + host['host'].ssh.connect() + innaguratedHosts.append(host) + + return innaguratedHosts + + +def _allocateTestNodesInChunks(masterHost, hostsToInnagurate): + chunks = lambda l, n: [l[x: x + n] for x in xrange(0, len(l), n)] + hostsToInnagurateInChunks = chunks(hostsToInnagurate, 50) + totalInnauguratedHosts = [] + for hostsChunk in hostsToInnagurateInChunks: + totalInnauguratedHosts.extend(_allocateTestNodes(masterHost, hostsChunk)) + return totalInnauguratedHosts + + +def findNetworkCliques(hosts, networksToCheck): + import networkx + netGraph = networkx.Graph() + netGraph.add_nodes_from([host['host'].name for host in hosts]) + networkGraphs = {networkname: netGraph.copy() for networkname in networksToCheck} + + for host in hosts: + netChecks = host['result']['net'] + if netChecks is not None: + for netCheck in netChecks: + checkName = netCheck[0] + extra = netCheck[3] + if netCheck[1]: + if 'ping on' in checkName and extra is not None: + (netName, srcHost, dstHost) = extra + networkGraphs[netName].add_edge(srcHost, dstHost) + + networkCliques = {networkName: list(networkx.find_cliques(networkGraph)) for networkName, networkGraph in networkGraphs.items()} + return networkCliques + + +def printServerResults(hosts): + passedServers = [] + failedServers = [] + for host in hosts.values(): + (passedServers, failedServers)[0 if host['result'].passed() else 1].append(host['result']) + + print "TOTALLY %d PASSED %d FAILED" % (len(passedServers), len(failedServers)) + + print "*********************FAILED SERVERS*******************************" + pp = pprint.PrettyPrinter(indent=4) + for server in failedServers: + print("%(name)s - %(summary)s" % dict(name=server['name'], summary=str(server.summary()))) + print "*********************FAILED SERVERS DETAILS*******************************" + pp.pprint(failedServers) + print "*********************FAILED SERVERS DETAILS*******************************" + + +def analyzeNetworks(hosts, vlans): + cliques = findNetworkCliques(hosts, vlans + ['untaged']) + print "*********************NETWORK CLIQUES*******************************" + pp = pprint.PrettyPrinter(indent=4) + for netName, networks in cliques.items(): + pp.pprint("%(name)s - %(networks)s" % dict(name=str(netName), networks=networks)) + + +def printHostsThatFailedInnaguration(failedHosts): + for hostID, log in failedHosts.items(): + logging.error('Host %(host)s failed innauguration serial log %(log)s', dict(host=hostID, log=log)) + + +def _initializeFastNetworkOnHost(host, vtags): + logging.info("Init Fast network in host %(host)s", dict(host=host['name'])) + hostToInitialize = host['host'] + testResult = host['result'] + try: + hostToInitialize.network.initialize() + except: + logging.exception("Failed to initialize network") + pciIdCard = hostToInitialize.network.mellanoxPCIId() + ethtoolResult = hostToInitialize.network.ethtool() + lspciOutput = hostToInitialize.ssh.run.script("lspci") + lsmodOutput = hostToInitialize.ssh.run.script("lsmod") + if pciIdCard is None: + testResult.addCheck('net', 'init fast net', False, "Mellanox Card is not identified lspci %(lspci)s lsmod %(lsmod)s" + % dict(lspci=lspciOutput, lsmod=lsmodOutput)) + elif hostToInitialize.network.fastInterface() is None: + testResult.addCheck('net', 'init fast net ', False, "Link is not connected on Mellanox %(ethtool)s" + % dict(ethtool=pprint.PrettyPrinter(indent=4).pformat(ethtoolResult))) + else: + testResult.addCheck('net', 'init fast net ', False, "Unknown problem lspci %(lspci)s lsmod %(lsmod)s %(ethtool)s" + % dict(lspci=lspciOutput, lsmod=lsmodOutput, ethtool=pprint.PrettyPrinter(indent=4).pformat(ethtoolResult))) + return False + try: + hostToInitialize.network.addTaggedDevices(vtags) + testResult.addCheck('net', 'init fast net', True) + return True + except: + logging.exception("Failed to Add vtags") + ifcfgOutput = hostToInitialize.network.ifconfig() + testResult.addCheck('net', 'init fast net', False, "Failed to add Vports ifcfg %(ifcfg)s" % dict(ifcfg=ifcfgOutput)) + return False + + +def _initializeFastNetworkOnTestHosts(hosts, vtags): + jobs = {host['name']: (_initializeFastNetworkOnHost, host, vtags) for host in hosts} + results = concurrently.run(jobs) + + initializedHosts = [host for host in hosts if results[host['name']]] + return initializedHosts + + +def _downloadHostsLogs(hosts): + try: + jobs = {host.name: (host.log.prepareAndDownload, '/var/log') + for host in hosts} + concurrently.run(jobs, numberOfThreads=10) + except: + logging.exception("Failed to dowwnload logs") + + +def _powerOffServerViaIPMI(hostToPowerOff): + serverIpmi = ipmi.IPMI(hostToPowerOff['ipmiHost'], + hostToPowerOff['ipmiUsername'], + hostToPowerOff['ipmiPassword']) + try: + serverIpmi._powerCommand('off') + return True + except: + logging.exception("Failed to power off %(host)s" % dict(host=hostToPowerOff['ipmiHost'])) + return False + + +def _powerOffServers(hosts): + jobs = {name: (_powerOffServerViaIPMI, hostToPowerOff['props']) + for name, hostToPowerOff in hosts.items()} + results = concurrently.run(jobs, numberOfThreads=30) + + sucessfullyPoweredOffHosts = [] + for hostId, result in results.items(): + if not result: + hosts[hostId]['result'].addCheck('init', 'IPMI power off', False, "Failed to connect via IPMI to %s" % hosts[hostId]['props']['ipmiHost']) + else: + hosts[hostId]['result'].addCheck('init', 'IPMI power off', True, '') + sucessfullyPoweredOffHosts.append(hosts[hostId]) + return sucessfullyPoweredOffHosts + + +def _createResultsMap(masterHost, hostsToInnagurate): + hostsMap = {} + for hostId, host in enumerate(hostsToInnagurate): + hostsMap[host['hostID']] = {'name': host['hostID'], + 'node': node.Node(host['hostID'], masterHost, host['macAddress'], host['ipAddress'], hostId), + 'props': host, + 'host': None, + 'result': servertestresult.ServerTestResult(host['hostID'])} + return hostsMap + + with open(args.rackYaml) as f: rackYaml = yaml.load(f) -targetNode = [n for n in rackYaml['HOSTS'] if n['id'] == args.targetNode][0] -client = clientfactory.factory() -logging.info("Allocating master node") -allocationInfo = api.AllocationInfo(user=args.rackattackUser, purpose="dryrun") + +targetNodes = [n for n in rackYaml['HOSTS'] if n['id'] in args.targetNode] +assert len(targetNodes) == len(args.ipAddress), "Amount of target nodes must be the same as IP`s %d != %d" % (len(targetNodes), len(args.ipAddress)) + +vtags = args.vlan label = subprocess.check_output(["solvent", "printlabel", "--thisProject", "--product=rootfs"]).strip() -requirements = dict(master=api.Requirement(imageLabel=label, imageHint="rootfs-basic")) -allocation = client.allocate(requirements, allocationInfo) -allocation.wait(timeout=5 * 60) -logging.info("Allocation successful, waiting for ssh") -masterNode = allocation.nodes()['master'] -ssh = connection.Connection(**masterNode.rootSSHCredentials()) -ssh.waitForTCPServer() -ssh.connect() -logging.info("Connected to ssh") -ssh.ftp.putFile("/tmp/master.egg", "build/master.egg") +masterHost = allocateMasterHost(args.rackattackUser, label) +masterHost.network.initialize() +masterHost.network.addTaggedDevices(vtags) +hostsToInnagurate = [] + +for targetNode, ipAddress in zip(targetNodes, args.ipAddress): + ipmiHost = socket.gethostbyname(targetNode['ipmiLogin']['hostname']) + ipmiUsername = targetNode['ipmiLogin']['username'] + ipmiPassword = targetNode['ipmiLogin']['password'] + macAddress = targetNode['primaryMAC'] + hostsToInnagurate.append(dict(hostID=targetNode['id'], + macAddress=macAddress, + ipAddress=ipAddress, + ipmiHost=ipmiHost, + ipmiUsername=ipmiUsername, + ipmiPassword=ipmiPassword)) + +exitCode = -1 +hosts = _createResultsMap(masterHost, hostsToInnagurate) +poweredOnHosts = [] try: - print ssh.run.script( - "PYTHONPATH=/tmp/master.egg " - "strace -fF -o /tmp/trace " - "python -m rackattack.dryrun.master.main " - "--hostID=%(targetNodeID)s --macAddress=%(macAddress)s " - "--ipmiHost=%(ipmiHost)s --ipmiUsername=%(ipmiUsername)s " - "--ipmiPassword=%(ipmiPassword)s --osmosisServerIP=%(osmosisServerIP)s " - "--ipAddress=%(ipAddress)s --label=%(label)s" % dict( - targetNodeID=targetNode['id'], - macAddress=targetNode['primaryMAC'], - ipmiHost=socket.gethostbyname(targetNode['ipmiLogin']['hostname']), - ipmiUsername=targetNode['ipmiLogin']['username'], - ipmiPassword=targetNode['ipmiLogin']['password'], - osmosisServerIP=args.osmosisServerIP, - ipAddress=args.ipAddress, - label=label)) + logging.info("Powering hosts off before start") + hostsToProced = _powerOffServers(hosts) + poweredOnHosts = _allocateTestNodesInChunks(masterHost, hostsToProced) + + logging.info('Going to test servers %(names)s', + dict(names=' '.join([innaguratedHost['name'] for innaguratedHost in hostsToProced]))) + + hostsToProced = _initializeFastNetworkOnTestHosts(poweredOnHosts, vtags) + if len(hostsToProced) > 0: + logging.info("Going to check %(servers)d servers", dict(servers=len(hostsToProced))) + healthchecher.checkServers(masterHost, hostsToProced, vtags) + + exitCode = 0 if len([host for host in hosts.values() if not host['result'].passed()]) == 0 else -1 except: - import traceback - traceback.print_exc() - time.sleep(1000000) + logging.exception("Failed running test script") +finally: + try: + _downloadHostsLogs([masterHost] + [host['host'] for host in poweredOnHosts]) + printServerResults(hosts) + analyzeNetworks(poweredOnHosts, vtags) + finally: + if args.debug: + import ipdb + ipdb.set_trace() + _powerOffServers(hosts) + logging.info('PASSED' if exitCode == 0 else 'FAILED') + sys.exit(exitCode) diff --git a/py/rackattack/dryrun/master/main.py b/py/rackattack/dryrun/master/main.py deleted file mode 100644 index d1f709a..0000000 --- a/py/rackattack/dryrun/master/main.py +++ /dev/null @@ -1,88 +0,0 @@ -import logging -logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') -import argparse -import threading -from rackattack.common import tftpboot -from rackattack.common import dnsmasq -from rackattack.common import inaugurate -from rackattack.physical import ipmi -from rackattack.physical import serialoverlan -from rackattack.dryrun.master import network -from rackattack.common import globallock -import time - -parser = argparse.ArgumentParser() -parser.add_argument("--hostID", required=True) -parser.add_argument("--macAddress", required=True) -parser.add_argument("--ipmiHost", required=True) -parser.add_argument("--ipmiUsername", required=True) -parser.add_argument("--ipmiPassword", required=True) -parser.add_argument("--osmosisServerIP", required=True) -parser.add_argument("--ipAddress", required=True) -parser.add_argument("--label", required=True) -args = parser.parse_args() - - -checkInEvent = threading.Event() -doneEvent = threading.Event() - - -def inaugurateCheckIn(): - logging.info("Inaugurator checked in") - inaugurateInstance.provideLabel(ipAddress=args.ipAddress, label=args.label) - checkInEvent.set() - - -def inaugurateDone(): - logging.info("Inaugurator done") - doneEvent.set() - - -network.dropFirewall() -logging.info("MyIP: %(ip)s", dict(ip=network.myIP())) - -tftpbootInstance = tftpboot.TFTPBoot( - netmask=network.netmask(), - inauguratorServerIP=network.myIP(), - osmosisServerIP=args.osmosisServerIP, - inauguratorGatewayIP=network.myIP(), - rootPassword="dryrun", - withLocalObjectStore=True) -dnsmasq.DNSMasq.eraseLeasesFile() -dnsmasq.DNSMasq.killAllPrevious() -dnsmasqInstance = dnsmasq.DNSMasq( - tftpboot=tftpbootInstance, - serverIP=network.myIP(), - netmask=network.netmask(), - firstIP=args.ipAddress, - lastIP=args.ipAddress, - gateway=network.gateway(), - nameserver=network.myIP()) -logging.info("Sleeping 1 second to let dnsmasq go up, so it can receive SIGHUP") -time.sleep(1) -logging.info("Done Sleeping 1 second to let dnsmasq go up, so it can receive SIGHUP") -inaugurateInstance = inaugurate.Inaugurate(bindHostname=network.myIP()) -with globallock.lock(): - dnsmasqInstance.add(args.macAddress, args.ipAddress) - inaugurateInstance.register( - ipAddress=args.ipAddress, - checkInCallback=inaugurateCheckIn, - doneCallback=inaugurateDone) - tftpbootInstance.configureForInaugurator(args.macAddress, args.ipAddress, clearDisk=True) -sol = serialoverlan.SerialOverLan(args.ipmiHost, args.ipmiUsername, args.ipmiPassword, args.hostID) -ipmiInstance = ipmi.IPMI(args.ipmiHost, args.ipmiUsername, args.ipmiPassword) -ipmiInstance.powerCycle() -try: - logging.info("Waiting for inaugurator to check in") - checkInEvent.wait(6 * 60) - if not checkInEvent.isSet(): - raise Exception("Timeout waiting for inaugurator to checkin") - logging.info("Inaugurator checked in, waiting for inaugurator to complete") - doneEvent.wait(7 * 60) - if not doneEvent.isSet(): - raise Exception("timeout waiting for inaugurator to be done") -except: - logging.info("Serial log was:\n%(log)s", dict(log=open(sol.serialLogFilename()).read())) - raise -finally: - ipmiInstance.off() diff --git a/py/rackattack/dryrun/master/network.py b/py/rackattack/dryrun/master/network.py deleted file mode 100644 index 56adf28..0000000 --- a/py/rackattack/dryrun/master/network.py +++ /dev/null @@ -1,26 +0,0 @@ -import re -import socket -import subprocess - - -def myIP(): - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - try: - s.connect(("1.1.1.1", 1000)) - return s.getsockname()[0] - finally: - s.close() - - -def netmask(): - output = subprocess.check_output(['ifconfig']) - return re.search(r"inet\s+%s\s+netmask\s+(\S+)\s" % myIP(), output).group(1) - - -def gateway(): - output = subprocess.check_output(['ip', 'route', 'show']) - return re.search(r"default\s+via\s+(\S+)\s", output).group(1) - - -def dropFirewall(): - subprocess.check_output(["iptables", "--flush"]) diff --git a/py/rackattack/dryrun/node.py b/py/rackattack/dryrun/node.py new file mode 100644 index 0000000..e50d7c4 --- /dev/null +++ b/py/rackattack/dryrun/node.py @@ -0,0 +1,23 @@ +from rackattack import api + + +class Node(api.Node): + + def __init__(self, name, masterHost, macAddress, ipAddress, nodeId): + self._ipAddress = ipAddress + self.masterHost = masterHost + self._name = name + self.nodeId = nodeId + self._primaryMacAddress = macAddress + + def ipAddress(self): + return self._ipAddress + + def name(self): + return self._name + + def id(self): + return self.nodeId + + def primaryMACAddress(self): + return self._primaryMacAddress diff --git a/py/rackattack/dryrun/plugins/__init__.py b/py/rackattack/dryrun/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/py/rackattack/dryrun/plugins/disk.py b/py/rackattack/dryrun/plugins/disk.py new file mode 100644 index 0000000..4246589 --- /dev/null +++ b/py/rackattack/dryrun/plugins/disk.py @@ -0,0 +1,19 @@ +from strato.racktest.hostundertest import plugins +from rackattack.dryrun.seeds import cpuinfo +from rackattack.dryrun.lib import cpuinfo as libcpuinfo +import logging + + +class Disk: + + def __init__(self, host): + self._host = host + + def smartctlStatus(self, deviceToCheck): + output = self._host.ssh.run.script("smartctl -H %(device)s" % dict(device=deviceToCheck)).strip() + return ('PASSED' in output, output) + + def rotational(self, deviceName): + return 1 == int(self._host.ssh.run.script('cat /sys/block/%(device)s/queue/rotational' % dict(device=deviceName)).strip()) + +plugins.register('disk', Disk) diff --git a/py/rackattack/dryrun/plugins/kernel.py b/py/rackattack/dryrun/plugins/kernel.py new file mode 100644 index 0000000..61ad0e4 --- /dev/null +++ b/py/rackattack/dryrun/plugins/kernel.py @@ -0,0 +1,57 @@ +from strato.racktest.hostundertest import plugins +from rackattack.dryrun.seeds import cpuinfo +from rackattack.dryrun.lib import cpuinfo as libcpuinfo +from strato.common.multithreading import waittonotthrow +import logging + + +class Kernel: + + def __init__(self, host): + self._host = host + + def version(self): + return self._host.ssh.run.script("uname -r") + + def is_debug(self): + return "debug" in self.version() + + def cpuinfo(self): + return libcpuinfo.CpuInfo(self._host.seed.runCallable(cpuinfo.cpuInfo)[0]) + + def rdmsr(self, register): + return self._host.ssh.run.script("rdmsr %(regnum)s" % dict(regnum=register)).strip() + + def modprobe(self, module, parameters=""): + try: + self._host.ssh.run.script("modprobe %(module)s %(parameters)s" % dict(module=module, parameters=parameters)) + except: + self._logDmesgOnModuleLoadFailure(module) + raise + + def removeKernelModuleIfLoaded(self, module): + if self.isModuleLoaded(module): + self.removeKernelModule(module) + + def removeKernelModule(self, module): + TIME_WAIT_FOR_RMMOD_TO_SUCCEEDD = 10 + try: + waittonotthrow.WaitToNotThrow(timeout=TIME_WAIT_FOR_RMMOD_TO_SUCCEEDD).wait(lambda: self._host.ssh.run.script("rmmod %s" % module)) + except: + logging.exception("Failed to remove module %(module)s lsmod=%(lsmod)s", + dict(module=module, lsmod=self._host.ssh.run.script("lsmod"))) + raise + + def isModuleLoaded(self, module): + output = self._host.ssh.run.script("lsmod") + return module in output.split() + + def _logDmesgOnModuleLoadFailure(self, moduleName): + # We assume that if we fail here it is because of some module dependencies, lets log last lines from dmesg + try: + logging.error('Failed to modprobe module %(module)s dmesg: %(dmesg)s', dict(module=moduleName, dmesg=self.dmesg(30))) + except: + pass # If fail on dmesg, ignore nothing we can do about it + + +plugins.register('kernel', Kernel) diff --git a/py/rackattack/dryrun/plugins/logplugin.py b/py/rackattack/dryrun/plugins/logplugin.py new file mode 100644 index 0000000..11bb871 --- /dev/null +++ b/py/rackattack/dryrun/plugins/logplugin.py @@ -0,0 +1,46 @@ +from strato.racktest.hostundertest import plugins +from strato.common import log +import logging +import time +import os + + +TAR_COMMAND = """tar -c --warning=no-file-changed --use-compress-program=pigz -f %(targetpath)s %(srcPath)s +exitcode=$? +if [ "$exitcode" != "1" ] && [ "$exitcode" != "0" ]; then + exit $exitcode +fi +exit 0 +""" + + +class LogPlugin: + + def __init__(self, host): + self._host = host + + def prepareForDownload(self, path): + tarFileName = LogPlugin._remoteTarFileName() + tarFilePath = os.path.join("/tmp", tarFileName) + self._host.ssh.run.script(TAR_COMMAND % dict(targetpath=tarFilePath, srcPath=path)) + return tarFilePath + + def download(self, tarFilePath): + localTarPath = self._localTarFilePath(log.config.LOGS_DIRECTORY) + localTarDir = os.path.dirname(localTarPath) + if not os.path.exists(localTarDir): + os.makedirs(localTarDir) + self._host.ssh.ftp.getFile(tarFilePath, localTarPath) + + def prepareAndDownload(self, path): + zipedFilePath = self.prepareForDownload(path) + self.download(zipedFilePath) + + def _localTarFilePath(self, localDir): + return os.path.join(localDir, "logs.%(hostName)s" % dict(hostName=self._host.name), "logs.tar.gz") + + @staticmethod + def _remoteTarFileName(): + return "racktest.logplugin.%s.tar.gz" % time.strftime("%Y%m%d%H%M%S") + +plugins.register("log", LogPlugin) diff --git a/py/rackattack/dryrun/plugins/network.py b/py/rackattack/dryrun/plugins/network.py new file mode 100644 index 0000000..11051c4 --- /dev/null +++ b/py/rackattack/dryrun/plugins/network.py @@ -0,0 +1,106 @@ +from strato.racktest.hostundertest import plugins +from rackattack.dryrun.seeds import network as seednetwork +import logging +import ipaddr +import time +from rackattack.dryrun.common import waitforpredicate +from strato.common.multithreading import subprocesswrappers + + +NETWORK_OFFSET = 10 + +SYSCONFIG_NETWORK_CONF = """DEVICE=%(device)s +HWADDR=%(mac)s +BOOTPROTO=static +IPADDR=%(ip)s +NETMASK=%(mask)s""" + + +class Network(object): + + def __init__(self, host): + self._host = host + self._mellanixPCIId = None + self.networks = dict() + + def initialize(self): + logging.info("Initializing fast network on host %(host)s", dict(host=self._host.name)) + self._initMellanoxDevice() + self._configureFastNetwork() + + def addTaggedDevice(self, vport, inetAddr): + untagedDeviceName = self.networks['untaged']['device'] + self._host.ssh.run.script("vconfig add %(deviceName)s %(vlanID)s" % dict(deviceName=untagedDeviceName, vlanID=vport)) + deviceName = "%(device)s.%(port)d" % dict(device=untagedDeviceName, port=vport) + self._host.seed.runCallable(seednetwork.configureStaticIPOnDevice, inetAddr, deviceName) + self.networks[vport] = dict(device=deviceName, ip=inetAddr.ip) + + def addTaggedDevices(self, vports): + for i, vport in enumerate(vports): + self.addTaggedDevice(vport, self._fastNetworkIpAddressFromMgmtIpAddress(i + 1)) + + def _fastNetworkIpAddressFromMgmtIpAddress(self, offset=0): + publicIpList = self._host.node.ipAddress().split('.') + publicIpList[2] = str(int(publicIpList[2]) + NETWORK_OFFSET + offset) + newIP = '.'.join(publicIpList) + netAddress = ipaddr.IPv4Network('%s/%d' % (newIP, 24)) + return netAddress + + def _configureFastNetwork(self): + try: + privateInterface = waitforpredicate.WaitForPredicate(timeout=40, interval=3).waitAndReturn(self.fastInterface) + except: + interfaces = self._host.seed.runCallable(seednetwork.interfaces)[0] + logging.exception("Failed to aquire fast interface on host %(host)s existing %(interfaces)s", + dict(host=self._host.name, interfaces=interfaces)) + raise + + inet = self._fastNetworkIpAddressFromMgmtIpAddress() + device = privateInterface[0] + mac = privateInterface[2] + logging.info("Adding ip address %(ip)s in host %(host)s device %(device)s mac %(mac)s" + % dict(ip=inet.ip, host=self._host.name, device=device, mac=mac)) + self.networks['untaged'] = dict(device=device, ip=inet.ip) + staticConfPath = '/etc/sysconfig/network-scripts/ifcfg-%(deviceName)s' % dict(deviceName=device) + self._host.ssh.ftp.putContents(staticConfPath, SYSCONFIG_NETWORK_CONF % + dict(device=device, ip=inet.ip, mac=mac, mask=inet.netmask)) + self._host.seed.runCallable(seednetwork.configureStaticIPOnDevice, inet, device) + + def _mellanoxPCICardID(self): + lspciLines = self._host.ssh.run.script("lspci").split('\n') + for line in lspciLines: + if 'Mellanox' in line: + if any(x in line for x in ['Network controller', 'Ethernet controller']): + return line.split(' ')[0] + return None + + def _initMellanoxDevice(self): + self._host.kernel.removeKernelModuleIfLoaded('mlx4_en') + self._host.kernel.removeKernelModuleIfLoaded('mlx4_core') + self._host.kernel.modprobe('mlx4_core', 'port_type_array=2,2') + self._host.kernel.modprobe('mlx4_en') + deviceName = waitforpredicate.WaitForPredicate(timeout=30, interval=3).waitAndReturn(self._mellanoxPCICardID) + self._mellanixPCIId = deviceName + self._host.kernel.modprobe('8021q') + self._host.ssh.run.script("/bin/echo eth > /sys/bus/pci/devices/0000:%(deviceName)s/mlx4_port1" + % dict(deviceName=deviceName)) + # Second port is not a must and in fact does not exists in bezeq cloud + self._host.ssh.run.script("/bin/echo eth > /sys/bus/pci/devices/0000:%(deviceName)s/mlx4_port2 || true" + % dict(deviceName=deviceName)) + + def mellanoxPCIId(self): + return self._mellanixPCIId + + def fastInterface(self): + interfaces = self._host.seed.runCallable(seednetwork.interfaces)[0] + if len(interfaces['fast']) > 0: + return interfaces['fast'][0] + return None + + def ethtool(self): + return self._host.seed.runCallable(seednetwork.ethtool)[0] + + def ifconfig(self): + self._host.ssh.run.script("ifconfig -a -v") + +plugins.register('network', Network) diff --git a/py/rackattack/dryrun/seeds/__init__.py b/py/rackattack/dryrun/seeds/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/py/rackattack/dryrun/seeds/cpuinfo.py b/py/rackattack/dryrun/seeds/cpuinfo.py new file mode 100644 index 0000000..b32a040 --- /dev/null +++ b/py/rackattack/dryrun/seeds/cpuinfo.py @@ -0,0 +1,5 @@ +import re + + +def cpuInfo(): + return open("/proc/cpuinfo").read() diff --git a/py/rackattack/dryrun/seeds/innaugurator.py b/py/rackattack/dryrun/seeds/innaugurator.py new file mode 100644 index 0000000..28e569c --- /dev/null +++ b/py/rackattack/dryrun/seeds/innaugurator.py @@ -0,0 +1,180 @@ +import logging +import socket +import functools +import shutil +import datetime +import os +from strato.common.log import configurelogging +configurelogging.configureLogging('innaurugrator') +import argparse +import threading +from rackattack.common import tftpboot +from rackattack.common import dnsmasq +from rackattack.common import inaugurate +from rackattack.physical import ipmi +from rackattack.physical import serialoverlan +from rackattack.common import globallock +import time +import network +from strato.common.multithreading import concurrently +from rackattack.physical import config +config.SERIAL_LOGS_DIRECTORY = '/var/log/rackattack' + + +class Waiter: + + def __init__(self, nodes): + self.nodes = nodes + self.condition = threading.Condition() + + def notifyOne(self, checkedInNode): + self.condition.acquire() + self.nodes = [node for node in self.nodes if node is not checkedInNode] + if len(self.nodes) == 0: + self.condition.notifyAll() + self.condition.release() + + def waitAll(self, timeout=None): + self.condition.acquire() + self.condition.wait(timeout=timeout) + self.condition.release() + return self.nodes + + +def waitForTCPServer(hostname, port, timeout=60, interval=0.1): + before = time.time() + while time.time() - before < timeout: + if _rawTCPConnect((hostname, port)): + return + time.sleep(interval) + raise Exception("SSH TCP Server '%(hostname)s:%(port)s' did not respond within timeout" % dict(hostname=hostname, port=port)) + + +def _rawTCPConnect(tcpEndpoint): + s = socket.socket() + try: + s.connect(tcpEndpoint) + return True + except: + return False + finally: + s.close() + + +def inaugurateCheckIn(inaugurateInstance, innaguratedNode, rootfsLabel, notifier): + logging.info("Inaugurator checked in for node %(ip)s", dict(ip=innaguratedNode['ipAddress'])) + inaugurateInstance.provideLabel(ipAddress=innaguratedNode['ipAddress'], label=rootfsLabel) + notifier.notifyOne(innaguratedNode) + + +def inaugurateDone(innaguratedNode, notifier): + logging.info("Inaugurator Done for node %(ip)s", dict(ip=innaguratedNode['ipAddress'])) + notifier.notifyOne(innaguratedNode) + + +def _prepareForInnauguration(dnsmasqInstance, inaugurateInstance, tftpbootInstance, + nodesToInnagurate, rootfsLabel, checkinWaiter, doneWaiter, noClearDisk): + with globallock.lock(): + for nodeToInnaugurate in nodesToInnagurate: + dnsmasqInstance.add(nodeToInnaugurate['macAddress'], nodeToInnaugurate['ipAddress']) + checkInCallback = functools.partial(inaugurateCheckIn, + inaugurateInstance, + nodeToInnaugurate, + rootfsLabel, + checkinWaiter) + doneCallback = functools.partial(inaugurateDone, + innaguratedNode=nodeToInnaugurate, + notifier=doneWaiter) + inaugurateInstance.register(ipAddress=nodeToInnaugurate['ipAddress'], + checkInCallback=checkInCallback, + doneCallback=doneCallback) + tftpbootInstance.configureForInaugurator(nodeToInnaugurate['macAddress'], + nodeToInnaugurate['ipAddress'], + clearDisk=not noClearDisk) + + +def _waitServersToInitializeNetwork(nodesToWaitForIp): + def waitServerToInitNetwork(server): + try: + waitForTCPServer(server['ipAddress'], 22) + return True + except: + logging.exception("Failed to wait for active ssh connection on %(node)s", + dict(node=server['hostID'])) + return False + + jobs = {host['hostID']: (waitServerToInitNetwork, host) for host in nodesToWaitForIp} + results = concurrently.run(jobs) + + return [node for node in nodesToWaitForIp if not results[node['hostID']]] + + +def _powerCycleServer(nodeToInnaugurate): + ipmiInstance = ipmi.IPMI(nodeToInnaugurate['ipmiHost'], + nodeToInnaugurate['ipmiUsername'], + nodeToInnaugurate['ipmiPassword']) + ipmiInstance.forceBootFrom('pxe') + ipmiInstance.powerCycle() + + +def innaugurate(osmosisServerIP, rootfsLabel, nodesToInnagurate, noClearDisk): + network.dropFirewall() + logging.info("MyIP: %(ip)s", dict(ip=network.myIP())) + + tftpbootInstance = tftpboot.TFTPBoot( + netmask=network.netmask(), + inauguratorServerIP=network.myIP(), + osmosisServerIP=osmosisServerIP, + inauguratorGatewayIP=network.gateway(), + rootPassword="dryrun", + withLocalObjectStore=True) + dnsmasq.DNSMasq.eraseLeasesFile() + dnsmasq.DNSMasq.killAllPrevious() + dnsmasqInstance = dnsmasq.DNSMasq( + tftpboot=tftpbootInstance, + serverIP=network.myIP(), + netmask=network.netmask(), + ipAddress=nodesToInnagurate[0]['ipAddress'], + gateway=network.gateway(), + nameserver=network.myIP()) + + logging.info("Sleeping 1 second to let dnsmasq go up, so it can receive SIGHUP") + time.sleep(1) + logging.info("Done Sleeping 1 second to let dnsmasq go up, so it can receive SIGHUP") + inaugurateInstance = inaugurate.Inaugurate(bindHostname=network.myIP()) + + checkinWaiters = Waiter(nodesToInnagurate) + doneWaiters = Waiter(nodesToInnagurate) + _prepareForInnauguration(dnsmasqInstance, inaugurateInstance, + tftpbootInstance, nodesToInnagurate, rootfsLabel, + checkinWaiters, doneWaiters, noClearDisk) + solReaders = dict() + for nodeToInnaugurate in nodesToInnagurate: + sol = serialoverlan.SerialOverLan(nodeToInnaugurate['ipmiHost'], + nodeToInnaugurate['ipmiUsername'], + nodeToInnaugurate['ipmiPassword'], + nodeToInnaugurate['hostID']) + solReaders[nodeToInnaugurate['macAddress']] = sol + + jobs = {nodeToInnaugurate['ipmiHost']: (_powerCycleServer, nodeToInnaugurate) for nodeToInnaugurate in nodesToInnagurate} + concurrently.run(jobs) + + logging.info("Waiting for inaugurator to check in") + failedNodesList = [] + failedToCheckinNodes = checkinWaiters.waitAll(timeout=10 * 60) + nodesToInnagurate = [node for node in nodesToInnagurate if node not in failedToCheckinNodes] + logging.error("Failed to checkin nodes %(nodes)s", dict(nodes=failedToCheckinNodes)) + for nodeNotToWaitDone in failedToCheckinNodes: + doneWaiters.notifyOne(nodeNotToWaitDone) + notDoneNodes = doneWaiters.waitAll(timeout=15 * 60) + nodesToInnagurate = [node for node in nodesToInnagurate if node not in notDoneNodes] + failedNodesList.extend(failedToCheckinNodes) + failedNodesList.extend(notDoneNodes) + logging.error("Failed to finish nodes %(nodes)s", dict(nodes=notDoneNodes)) + + nodesToWaitForIp = [node for node in nodesToInnagurate if node not in notDoneNodes] + failedNodesList.extend(_waitServersToInitializeNetwork(nodesToWaitForIp)) + failedNodes = {node['hostID']: open(solReaders[node['macAddress']].serialLogFilename()).read() + for node in failedNodesList} + shutil.copy(dnsmasqInstance._logFile.name, '/var/log/rackattack/') + return failedNodes diff --git a/py/rackattack/dryrun/seeds/network.py b/py/rackattack/dryrun/seeds/network.py new file mode 100644 index 0000000..92ca708 --- /dev/null +++ b/py/rackattack/dryrun/seeds/network.py @@ -0,0 +1,61 @@ +import netifaces +import re +import subprocess +import socket + + +def _exec(command): + return subprocess.check_output(command, shell=True, stdin=open('/dev/null'), close_fds=True) + + +def configureStaticIPOnDevice(ip4Network, deviceName): + _exec('ip addr add %(ipmask)s dev %(deviceName)s' % dict(ipmask=ip4Network.with_prefixlen, deviceName=deviceName)) + + +def interfaces(): + nicsBySpeed = dict(slow=[], fast=[]) + # This is copied fron postinstaller + nics = [nic for nic in netifaces.interfaces() if nic.startswith('e') or nic.startswith('p')] + + for nic in nics: + ethtoolOutput = _exec('ethtool %s' % nic).split('\n\t') + speedString = ''.join([ethtoolLine for ethtoolLine in ethtoolOutput if ethtoolLine.startswith('Speed')]) + if not speedString or speedString == 'Speed: Unknown!': + continue + else: + speed = int(re.findall(r'\d+', speedString)[0]) + speedKey = 'fast' if speed > 1000 else 'slow' + macAddress = netifaces.ifaddresses(nic)[netifaces.AF_LINK][0]['addr'] + nicsBySpeed[speedKey].append((nic, speed, macAddress)) + return nicsBySpeed + + +def ethtool(): + return {nic: _exec('ethtool %s' % nic) for nic in netifaces.interfaces() + if nic.startswith('e') or nic.startswith('p')} + + +def myIP(): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + try: + s.connect(("1.1.1.1", 1000)) + return s.getsockname()[0] + finally: + s.close() + + +def netmask(): + output = subprocess.check_output(['ifconfig']) + return re.search(r"inet\s+%s\s+netmask\s+(\S+)\s" % myIP(), output).group(1) + + +def gateway(): + output = subprocess.check_output(['ip', 'route', 'show']) + return re.search(r"default\s+via\s+(\S+)\s", output).group(1) + + +def dropFirewall(): + subprocess.check_output(["iptables", "--flush"]) + subprocess.check_output(["iptables", '-t', 'nat', "--flush"]) + subprocess.check_output(["iptables", "--delete-chain"]) + subprocess.check_output(["iptables", '-t', 'nat', "--delete-chain"]) diff --git a/py/rackattack/dryrun/servertestresult.py b/py/rackattack/dryrun/servertestresult.py new file mode 100644 index 0000000..c4bf22c --- /dev/null +++ b/py/rackattack/dryrun/servertestresult.py @@ -0,0 +1,27 @@ +class ServerTestResult(dict): + + def __init__(self, serverId): + self['name'] = serverId + self['status'] = 'SUCCESS' + + def addCheck(self, checkCategory, checkName, checkStatus, checkLog='', extra=None): + categoryEntry = dict.setdefault(self, checkCategory, []) + categoryEntry.append((checkName, checkStatus, checkLog, extra)) + if not checkStatus: + self['status'] = 'FAIL' + + def failedChecks(self): + failedTests = [] + for category, categoryChecks in dict.items(self): + failedTests.extend([(category, check) for check in categoryChecks if check[1] is False]) + return failedTests + + def failedChecksByCategory(self, categoryName): + return [check for check in self[categoryName] if check[1] is False] + + def summary(self): + return {category: len(self.failedChecksByCategory(category)) == 0 + for category in self.keys() if category not in ['name', 'status']} + + def passed(self): + return self['status'] == 'SUCCESS' diff --git a/py/strato/__init__.py b/py/strato/__init__.py new file mode 100644 index 0000000..48e952a --- /dev/null +++ b/py/strato/__init__.py @@ -0,0 +1,2 @@ +import upseto.pythonnamespacejoin +__path__.extend(upseto.pythonnamespacejoin.join(globals())) diff --git a/py/strato/common/__init__.py b/py/strato/common/__init__.py new file mode 100644 index 0000000..48e952a --- /dev/null +++ b/py/strato/common/__init__.py @@ -0,0 +1,2 @@ +import upseto.pythonnamespacejoin +__path__.extend(upseto.pythonnamespacejoin.join(globals())) diff --git a/solvent.manifest b/solvent.manifest index 4412c49..420f461 100644 --- a/solvent.manifest +++ b/solvent.manifest @@ -1,5 +1,5 @@ requirements: -- hash: 2c065fef7b3323e4449a36e368486b61d2e25e4f - originURL: https://github.com/Stratoscale/rootfs-basic.git +- hash: 0bdc94ee45ce920dd5454f06b7bdb8cb9607c32b + originURL: https://github.com/Stratoscale/rootfs-centos7-basic.git - hash: ec6af434552d852d7ad217402e5c7393102bb102 originURL: https://github.com/Stratoscale/rootfs-build.git diff --git a/upseto.manifest b/upseto.manifest index 19a5e69..5ffc223 100644 --- a/upseto.manifest +++ b/upseto.manifest @@ -1,5 +1,9 @@ requirements: -- hash: e55359271d8f71f63f7c2cfa08f4ccf2ba0cbd99 +- hash: d5f7438c25437f8635d9485440f9db0516a3a10f + originURL: https://github.com/Stratoscale/pyracktest.git +- hash: ba5959a1a357fbe60db0e46cc4c5808b7ead97cf + originURL: https://github.com/Stratoscale/pycommonmultithreading.git +- hash: 6e98ac3fe49514e5c0b3255602721308f7dfe1f1 originURL: https://github.com/Stratoscale/rackattack-physical.git - hash: 389d66d1af1b7ee78000201b8b7c0defa66dbf6b - originURL: https://github.com/Stratoscale/inaugurator.git + originURL: https://github.com/Stratoscale/inaugurator.git \ No newline at end of file