I was able to successfully restart the nova-compute service by adding the correct mac address to the correct iov ports in the following way.
- find the MAC address of the VM using nova list and neutron port-list commands
[root@n01 ~(keystone_admin)]# nova list | grep centos0| 75c41eaf-77e1-49ba-bff5-368bedea66ca | centos0 | SHUTOFF | None | Shutdown | net1=10.0.0.2 |
[root@n01 ~(keystone_admin)]# neutron port-list | grep '10.0.0.2'| 00e377b5-c050-47f5-b3f9-8a807ff2ac7e | | fa:16:3e:62:c2:c3 | {"subnet_id": "a82ea062-53e3-4dc3-8e1e-1524d7dbb2c4", "ip_address": "10.0.0.2"} |
- on the nova compute host owning the VM, find the IOV VF attached to the VM (in my case is 0000:01:01.0)
[root@n08 ~]# virsh dumpxml instance-00000014 | grep -A5 '<hostdev'
<hostdev mode='subsystem' type='pci' managed='no'>
<source>
<address domain='0x0000' bus='0x01' slot='0x01' function='0x0'/>
</source>
<alias name='hostdev0'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/>
- find the GUID index of the VF
[root@n08 ~]# cat /sys/class/infiniband/mlx4_0/iov/0000\:01\:01.0/ports/1/gid_idx/0
8
- add the MAC address of the VM (fa:16:3e:62:c2:c3) to the GUID table at index 8 (add )
[root@n08 ~]# ebrctl write-sys /sys/class/infiniband/mlx4_0/iov/ports/1/admin_guids/8 fa163e000062c2c3
- restart eswitchd (optional?) and neutron-mlnx-agent. eSwitchd logs now shows vnics with the correct MAC addresses
2014-02-01 15:58:01,460 DEBUG vnics are {'fa:16:3e:e3:8f:98': {'mac': 'fa:16:3e:e3:8f:98', 'device_id': 'd481dfde-0795-4a2e-89ca-3369cb49cbe1'}, 'fa:16:3e:1f:a5:94': {'mac': 'fa:16:3e:1f:a5:94', 'device_id': '29314aa9-6ddc-421b-ad82-5090f8ccaecb'}, 'fa:16:3e:62:c2:c3': {'mac': 'fa:16:3e:62:c2:c3', 'device_id': '75c41eaf-77e1-49ba-bff5-368bedea66ca'}}2014-02-01
15:58:02,048 DEBUG Handling message - {u'action': u'set_vlan', u'vlan': 1, u'fabric': u'default', u'port_mac': u'fa:16:3e:62:c2:c3'}2014-02-01
15:58:02,059 DEBUG Running command: sudo eswitch-rootwrap /etc/eswitchd/rootwrap.conf ebrctl write-sys /sys/class/infiniband/mlx4_0/iov/0000:01:01.0/ports/1/pkey_idx/0
12014-02-01 15:58:02,228 DEBUGCommand: ['sudo', 'eswitch-rootwrap', '/etc/eswitchd/rootwrap.conf', 'ebrctl', 'write-sys', '/sys/class/infiniband/mlx4_0/iov/0000:01:01.0/ports/1/pkey_idx/0', '1']
- from the logs above eswitchd correctly maps the VF pkey index 0 to the PF pkey index 1 (vlan 1) but it forgets to map the VF pkey index 1 to the default pkey index 0
[root@n08 ~]# cat /sys/class/infiniband/mlx4_0/iov/0000\:01\:01.0/ports/1/pkey_idx/{0,1}
1
none
[root@n08 ~]# echo 0 > /sys/class/infiniband/mlx4_0/iov/0000\:01\:01.0/ports/1/pkey_idx/1
[root@n08 ~]# cat /sys/class/infiniband/mlx4_0/iov/0000\:01\:01.0/ports/1/pkey_idx/{0,1}
1
0
- nova-compute service should now be happy to start ;-)
- start your VM
ale