I'll by saying this is my first infiniband setup.
I have two Identical RHEL 6.4 servers connected to a Mellanox switch. The connection works but is far below the transfer speed of what I expected. I believe I am being capped at 10-14 Gbps but am not sure how to correct this. The Mellanox SX6036 switch shows the cards connected at 54.5 Gbps but only shows a channel of 10.
Infiniband Cards
Model CX353A ConnexX - 3 FDR InfiniBand + 40 Gig E
Infiniband Switch
Mellanox XS6036
Below I spit out as much information as I could about the switch and the cards. Any help would be appreciated.
Host Computer
[root@AMAX1 InstallFiles]# rpm -qa | grep MLN
libibmad-devel-1.3.9.MLNX20130522.1e79ec6-0.1.x86_64
opensm-4.1.5.MLNX20140424.25abcb5-0.1.x86_64
infiniband-diags-1.6.2.MLNX20131223.744ec44-0.1.x86_64
libibumad-static-1.3.8.MLNX20130522.da65ddf-0.1.x86_64
opensm-static-4.1.5.MLNX20140424.25abcb5-0.1.x86_64
libibumad-devel-1.3.8.MLNX20130522.da65ddf-0.1.x86_64
libibmad-static-1.3.9.MLNX20130522.1e79ec6-0.1.x86_64
opensm-devel-4.1.5.MLNX20140424.25abcb5-0.1.x86_64
ibutils2-2.1.1-0.43.MLNX20140319.gb85b33c.x86_64
libibumad-1.3.8.MLNX20130522.da65ddf-0.1.x86_64
infiniband-diags-compat-1.6.2.MLNX20131223.744ec44-0.1.x86_64
libibmad-1.3.9.MLNX20130522.1e79ec6-0.1.x86_64
opensm-libs-4.1.5.MLNX20140424.25abcb5-0.1.x86_64
[root@AMAX1 InstallFiles]# rpm -qa | grep OFED
libmlx5-1.0.1mlnx1-OFED.2.2.126.gf42e696.x86_64
libibverbs-devel-1.1.7mlnx1-OFED.2.2.125.gbfab13c.x86_64
libibcm-devel-1.0.5mlnx1-OFED.2.0.0.0.9.20130210.1800.gc8011c5.x86_64
ibacm-1.0.8mlnx4-OFED.2.1.160.gc153ee7.x86_64
dapl-2.0.40mlnx1-OFED.2.2.24.gd52df45.x86_64
knem-mlnx-1.1.1.90mlnx-OFED.2.2.126.g76489bb.rhel6u5.x86_64
kmod-iser-1.2-OFED.2.2.126.gdf6fefb.rhel6u5.x86_64
libibverbs-1.1.7mlnx1-OFED.2.2.125.gbfab13c.x86_64
libmlx4-1.0.5mlnx1-OFED.2.2.126.g453a28d.x86_64
libibcm-1.0.5mlnx1-OFED.2.0.0.0.9.20130210.1800.gc8011c5.x86_64
librdmacm-devel-1.0.17.2mlnx3-OFED.2.2.117.g81abe68.x86_64
dapl-utils-2.0.40mlnx1-OFED.2.2.24.gd52df45.x86_64
srptools-1.0.1-OFED.2.2.105.g247eefa.x86_64
kmod-mlnx-ofa_kernel-2.2-OFED.2.2.1.0.0.1.gdf6fefb.rhel6u5.x86_64
kmod-knem-mlnx-1.1.1.90mlnx-OFED.2.2.126.g76489bb.rhel6u5.x86_64
ofed-scripts-2.2-OFED.2.2.1.0.0.x86_64
libibverbs-utils-1.1.7mlnx1-OFED.2.2.125.gbfab13c.x86_64
libmlx5-devel-1.0.1mlnx1-OFED.2.2.126.gf42e696.x86_64
librdmacm-utils-1.0.17.2mlnx3-OFED.2.2.117.g81abe68.x86_64
dapl-devel-static-2.0.40mlnx1-OFED.2.2.24.gd52df45.x86_64
mlnx-ofa_kernel-devel-2.2-OFED.2.2.1.0.0.1.gdf6fefb.rhel6u5.x86_64
libibverbs-devel-static-1.1.7mlnx1-OFED.2.2.125.gbfab13c.x86_64
librdmacm-1.0.17.2mlnx3-OFED.2.2.117.g81abe68.x86_64
dapl-devel-2.0.40mlnx1-OFED.2.2.24.gd52df45.x86_64
kmod-srp-1.3.2-OFED.2.2.126.gdf6fefb.rhel6u5.x86_64
libmlx4-devel-1.0.5mlnx1-OFED.2.2.126.g453a28d.x86_64
mlnx-ofa_kernel-2.2-OFED.2.2.1.0.0.1.gdf6fefb.rhel6u5.x86_64
[root@AMAX1 sigint]# lspci | grep Net
01:00.0 Network controller: Mellanox Technologies MT27500 Family [ConnectX-3]05:00.0 Ethernet controller: Intel Corporation 82574L Gigabit Network Connection06:00.0 Ethernet controller: Intel Corporation 82574L Gigabit Network Connection82:00.0 Network controller: Mellanox Technologies MT27500 Family [ConnectX-3]
[root@AMAX1 sigint]# ethtool ib0
Settings for ib0:
Supported ports: [ ]
Supported link modes: 1000baseT/Full
1000baseKX/Full
10000baseT/Full
10000baseKX4/Full
10000baseKR/Full
40000baseKR4/Full
40000baseCR4/Full
40000baseSR4/Full
40000baseLR4/Full
Supported pause frame use: No
Supports auto-negotiation: No
Advertised link modes: 1000baseT/Full
1000baseKX/Full
10000baseT/Full
10000baseKX4/Full
10000baseKR/Full
40000baseKR4/Full
40000baseCR4/Full
40000baseSR4/Full
40000baseLR4/Full
Advertised pause frame use: No
Advertised auto-negotiation: No
Speed: 56000Mb/s
Duplex: Full
Port: Other
PHYAD: 1
Transceiver: internal
Auto-negotiation: on
Link detected: yes
[root@AMAX1 sigint]# ethtool -i ib0
driver: ib_ipoib
version: 2.2-1.0.0 (Apr 29 2014)
firmware-version: 2.31.5050
bus-info: 0000:01:00.0
supports-statistics: yes
supports-test: no
supports-eeprom-access: no
supports-register-dump: no
supports-priv-flags: no
[root@AMAX1 sigint]# ibstat
CA 'mlx4_0'
CA type: MT4099
Number of ports: 1
Firmware version: 2.31.5050
Hardware version: 1
Node GUID: 0x0002c90300193ac0
System image GUID: 0x0002c90300193ac3
Port 1:
State: Active
Physical state: LinkUp
Rate: 56
Base lid: 5
LMC: 0
SM lid: 2
Capability mask: 0x02514868
Port GUID: 0x0002c90300193ac1
Link layer: InfiniBand
SWITCH
Interface ib0 status:
Comment:
Admin up: yes
Link up: yes
IP address:
Netmask:
IPv6 enabled: yes
Autoconf enabled: no
Autoconf route: yes
Autoconf privacy: no
IPv6 addresses: 1
IPv6 address: fe80::202:c903:8e:c300/64
Speed: 10 Gb/sec (4X)
Duplex: full
Interface type: ib
Interface source: physical
MTU: 2044
HW address: 00:00:00:02:fe:80:00:00:00:00:00:00:00:02:c9:03:00:8e:c3:00
RX bytes: 32783954 TX bytes: 68
RX packets: 17367 TX packets: 1
RX mcast packets: 0 TX discards: 5
RX discards: 0 TX errors: 0
RX errors: 0 TX overruns: 0
RX overruns: 0 TX carrier: 0
RX frame: 0 TX collisions: 0
TX queue len: 256
Interface lo status:
Comment:
Admin up: yes
Link up: yes
IP address: 127.0.0.1
Netmask: 255.0.0.0
IPv6 enabled: yes
Autoconf enabled: yes
Autoconf route: yes
Autoconf privacy: no
IPv6 addresses: 1
IPv6 address: ::1/128
Speed: N/A
Duplex: N/A
Interface type: loopback
Interface source: loopback
MTU: 16436
Interface ib0 status:
Comment:
Admin up: yes
Link up: yes
IP address:
Netmask:
IPv6 enabled: yes
Autoconf enabled: no
Autoconf route: yes
Autoconf privacy: no
IPv6 addresses: 1
IPv6 address: fe80::202:c903:8e:c300/64
Speed: 10 Gb/sec (4X)
Duplex: full
Interface type: ib
Interface source: physical
MTU: 2044
HW address: 00:00:00:02:fe:80:00:00:00:00:00:00:00:02:c9:03:00:8e:c3:00
RX bytes: 32783954 TX bytes: 68
RX packets: 17367 TX packets: 1
RX mcast packets: 0 TX discards: 5
RX discards: 0 TX errors: 0
RX errors: 0 TX overruns: 0
RX overruns: 0 TX carrier: 0
RX frame: 0 TX collisions: 0
TX queue len: 256
Interface lo status:
Comment:
Admin up: yes
Link up: yes
IP address: 127.0.0.1
Netmask: 255.0.0.0
IPv6 enabled: yes
Autoconf enabled: yes
Autoconf route: yes
Autoconf privacy: no
IPv6 addresses: 1
IPv6 address: ::1/128
Speed: N/A
Duplex: N/A
Interface type: loopback
Interface source: loopback
MTU: 16436
HW address: N/A
RX bytes: 1342014721 TX bytes: 1342014721
RX packets: 9090498 TX packets: 9090498
RX mcast packets: 0 TX discards: 0
RX discards: 0 TX errors: 0
RX errors: 0 TX overruns: 0
RX overruns: 0 TX carrier: 0
RX frame: 0 TX collisions: 0
TX queue len: 0
Interface mgmt0 status:
Comment:
Admin up: yes
Link up: yes
IP address: 192.168.50.40
Netmask: 224.0.0.0
IPv6 enabled: yes
Autoconf enabled: no
Autoconf route: yes
Autoconf privacy: no
IPv6 addresses: 1
IPv6 address: fe80::202:c9ff:feb7:a008/64
Speed: 1000Mb/s (auto)
Duplex: full (auto)
Interface type: ethernet
Interface source: physical
MTU: 1500
HW address: 00:02:C9:B7:A0:08
RX bytes: 1180979803 TX bytes: 538602673
RX packets: 2941799 TX packets: 2320863
RX mcast packets: 0 TX discards: 0
RX discards: 0 TX errors: 0
RX errors: 0 TX overruns: 0
RX overruns: 0 TX carrier: 0
RX frame: 0 TX collisions: 0
TX queue len: 1000
Interface mgmt1 status:
Comment:
Admin up: yes
Link up: no
IP address:
Netmask:
IPv6 enabled: yes
Autoconf enabled: no
Autoconf route: yes
Autoconf privacy: no
Speed: 10Mb/s (auto)
Duplex: half (auto)
Interface type: ethernet
Interface source: physical
MTU: 1500
HW address: 00:02:C9:B7:A0:09
RX bytes: 0 TX bytes: 0
RX packets: 0 TX packets: 0
RX mcast packets: 0 TX discards: 0
RX discards: 0 TX errors: 0
RX errors: 0 TX overruns: 0
RX overruns: 0 TX carrier: 0
RX frame: 0 TX collisions: 0
TX queue len: 1000
Slot 1 port 3 state
Logical port state : Active
Physical port state : LinkUp
Current line rate : 56.0 Gbps
Supported speeds : 2.5, 5.0, 10.0(FDR10) or 14.0 Gbps rate
Speed : 14.0 Gbps rate
Supported widths : 1X, 4X
Width : 4X
Max supported MTUs : 4096
MTU : 4096
VL capabilities : VL0 - VL7
Operational VLs : VL0 - VL7
Description :
RX bytes : 1566841188
RX packets : 1196589
RX errors : 0
Symbol errors : 0
VL15 dropped packets : 0
TX bytes : 936869748
TX packets : 1411793
TX wait : 0
TX discarded packets : 0
Slot 1 port 5 state
Logical port state : Active
Physical port state : LinkUp
Current line rate : 56.0 Gbps
Supported speeds : 2.5, 5.0, 10.0(FDR10) or 14.0 Gbps rate
Speed : 14.0 Gbps rate
Supported widths : 1X, 4X
Width : 4X
Max supported MTUs : 4096
MTU : 4096
VL capabilities : VL0 - VL7
Operational VLs : VL0 - VL7
Description :
RX bytes : 807193788
RX packets : 961537
RX errors : 0
Symbol errors : 0
VL15 dropped packets : 0
TX bytes : 1436822608
TX packets : 1069741
TX wait : 0
TX discarded packets : 0