IB/mlx4: Micro-optimize mlx4_ib_poll_one()
Rather than byte-swapping cqe->g_mlpath_rqpn each time we extract a field from it, byte-swap it once into a temporary variable. This results in smaller, better code -- eg, on 32-bit x86: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-5 (-5) function old new delta mlx4_ib_poll_cq 1188 1183 -5 Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
parent
e57895d389
commit
b3226184af
1 changed files with 5 additions and 4 deletions
|
@ -313,6 +313,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
|
||||||
struct mlx4_ib_srq *srq;
|
struct mlx4_ib_srq *srq;
|
||||||
int is_send;
|
int is_send;
|
||||||
int is_error;
|
int is_error;
|
||||||
|
u32 g_mlpath_rqpn;
|
||||||
u16 wqe_ctr;
|
u16 wqe_ctr;
|
||||||
|
|
||||||
cqe = next_cqe_sw(cq);
|
cqe = next_cqe_sw(cq);
|
||||||
|
@ -426,10 +427,10 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
|
||||||
|
|
||||||
wc->slid = be16_to_cpu(cqe->rlid);
|
wc->slid = be16_to_cpu(cqe->rlid);
|
||||||
wc->sl = cqe->sl >> 4;
|
wc->sl = cqe->sl >> 4;
|
||||||
wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff;
|
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
|
||||||
wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f;
|
wc->src_qp = g_mlpath_rqpn & 0xffffff;
|
||||||
wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ?
|
wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
|
||||||
IB_WC_GRH : 0;
|
wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
|
||||||
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
|
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue