crypto: msm: crypto driver performance improvement

To minimize spinlock, the qce50 client is assumed that it can only issue
request to qce50 driver one at a time. After a request is issued to
qce50 from qcrypto. Without waiting for completion, other requests can
still be issued until the maximum limit of outstanding requests
in qce50 reaches.

To cut down the chance of udp socket receive buffer overflow the following
schemes are provided -
The number of bunched requests in qce50 is based on the data length of
the current request to cut down delay for smaller packets. In turn,
with smaller delay, the number of completed requests to process in
seq_response() completion function is less.

The scheduling of qcrypto requests are changed from least use to
round robin. This way, the distribution of requests to each engine
is more even. As the result, reordering of completed requests will
be less. Completed requests to handle in completion callback is less
at a time.

Change-Id: I723bac2f9427cddb5248101c9ac3f2b595ad0379
Acked-by: Che-Min Hsieh <cheminh@qti.qualcomm.com>
Signed-off-by: Sivanesan Rajapupathi <srajap@codeaurora.org>
Signed-off-by: Zhen Kong <zkong@codeaurora.org>
This commit is contained in:
Zhen Kong 2016-02-26 09:54:44 -05:00
parent 9e4b3ba4af
commit aa57cd6dbc
3 changed files with 288 additions and 131 deletions

View file

@ -1,6 +1,6 @@
/* Qualcomm Crypto Engine driver.
*
* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
* Copyright (c) 2012-2016, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@ -69,7 +69,7 @@ static LIST_HEAD(qce50_bam_list);
/* Max number of request supported */
#define MAX_QCE_BAM_REQ 8
/* Interrupt flag will be set for every SET_INTR_AT_REQ request */
#define SET_INTR_AT_REQ (MAX_QCE_BAM_REQ - 2)
#define SET_INTR_AT_REQ (MAX_QCE_BAM_REQ / 2)
/* To create extra request space to hold dummy request */
#define MAX_QCE_BAM_REQ_WITH_DUMMY_REQ (MAX_QCE_BAM_REQ + 1)
/* Allocate the memory for MAX_QCE_BAM_REQ + 1 (for dummy request) */
@ -84,6 +84,12 @@ static LIST_HEAD(qce50_bam_list);
/* Index to point the dummy request */
#define DUMMY_REQ_INDEX MAX_QCE_BAM_REQ
enum qce_owner {
QCE_OWNER_NONE = 0,
QCE_OWNER_CLIENT = 1,
QCE_OWNER_TIMEOUT = 2
};
struct dummy_request {
struct qce_sha_req sreq;
uint8_t *in_buf;
@ -133,9 +139,8 @@ struct qce_device {
struct ce_bam_info ce_bam_info;
struct ce_request_info ce_request_info[MAX_QCE_ALLOC_BAM_REQ];
unsigned int ce_request_index;
spinlock_t lock;
spinlock_t sps_lock;
unsigned int no_of_queued_req;
enum qce_owner owner;
atomic_t no_of_queued_req;
struct timer_list timer;
struct dummy_request dummyreq;
unsigned int mode;
@ -144,6 +149,7 @@ struct qce_device {
struct qce_driver_stats qce_stats;
atomic_t bunch_cmd_seq;
atomic_t last_intr_seq;
bool cadence_flag;
};
static void print_notify_debug(struct sps_event_notify *notify);
@ -2539,7 +2545,6 @@ static int _qce_sps_add_cmd(struct qce_device *pce_dev, uint32_t flag,
static int _qce_sps_transfer(struct qce_device *pce_dev, int req_info)
{
int rc = 0;
unsigned long flags;
struct ce_sps_data *pce_sps_data;
pce_sps_data = &pce_dev->ce_request_info[req_info].ce_sps;
@ -2551,7 +2556,6 @@ static int _qce_sps_transfer(struct qce_device *pce_dev, int req_info)
(unsigned int) req_info));
_qce_dump_descr_fifos_dbg(pce_dev, req_info);
spin_lock_irqsave(&pce_dev->sps_lock, flags);
if (pce_sps_data->in_transfer.iovec_count) {
rc = sps_transfer(pce_dev->ce_bam_info.consumer.pipe,
&pce_sps_data->in_transfer);
@ -2570,7 +2574,6 @@ static int _qce_sps_transfer(struct qce_device *pce_dev, int req_info)
ret:
if (rc)
_qce_dump_descr_fifos(pce_dev, req_info);
spin_unlock_irqrestore(&pce_dev->sps_lock, flags);
return rc;
}
@ -2955,23 +2958,20 @@ static inline int qce_alloc_req_info(struct qce_device *pce_dev)
}
}
pr_warn("pcedev %d no reqs available no_of_queued_req %d\n",
pce_dev->dev_no, pce_dev->no_of_queued_req);
pce_dev->dev_no, atomic_read(
&pce_dev->no_of_queued_req));
return -EBUSY;
}
static inline void qce_free_req_info(struct qce_device *pce_dev, int req_info,
bool is_complete)
{
unsigned long flags;
spin_lock_irqsave(&pce_dev->lock, flags);
pce_dev->ce_request_info[req_info].xfer_type = QCE_XFER_TYPE_LAST;
if (xchg(&pce_dev->ce_request_info[req_info].in_use, false) == true) {
if (req_info < MAX_QCE_BAM_REQ && is_complete)
pce_dev->no_of_queued_req--;
atomic_dec(&pce_dev->no_of_queued_req);
} else
pr_warn("request info %d free already\n", req_info);
spin_unlock_irqrestore(&pce_dev->lock, flags);
}
static void print_notify_debug(struct sps_event_notify *notify)
@ -3018,7 +3018,6 @@ static void qce_multireq_timeout(unsigned long data)
{
struct qce_device *pce_dev = (struct qce_device *)data;
int ret = 0;
unsigned long flags;
int last_seq;
last_seq = atomic_read(&pce_dev->bunch_cmd_seq);
@ -3029,27 +3028,29 @@ static void qce_multireq_timeout(unsigned long data)
return;
}
/* last bunch mode command time out */
spin_lock_irqsave(&pce_dev->lock, flags);
if (cmpxchg(&pce_dev->owner, QCE_OWNER_NONE, QCE_OWNER_TIMEOUT)
!= QCE_OWNER_NONE) {
mod_timer(&(pce_dev->timer), (jiffies + DELAY_IN_JIFFIES));
return;
}
del_timer(&(pce_dev->timer));
pce_dev->mode = IN_INTERRUPT_MODE;
pce_dev->qce_stats.no_of_timeouts++;
pr_debug("pcedev %d mode switch to INTR\n", pce_dev->dev_no);
spin_unlock_irqrestore(&pce_dev->lock, flags);
ret = qce_dummy_req(pce_dev);
if (ret)
pr_warn("pcedev %d: Failed to insert dummy req\n",
pce_dev->dev_no);
cmpxchg(&pce_dev->owner, QCE_OWNER_TIMEOUT, QCE_OWNER_NONE);
}
void qce_get_driver_stats(void *handle)
{
unsigned long flags;
struct qce_device *pce_dev = (struct qce_device *) handle;
if (!_qce50_disp_stats)
return;
spin_lock_irqsave(&pce_dev->lock, flags);
pr_info("Engine %d timeout occuured %d\n", pce_dev->dev_no,
pce_dev->qce_stats.no_of_timeouts);
pr_info("Engine %d dummy request inserted %d\n", pce_dev->dev_no,
@ -3059,20 +3060,16 @@ void qce_get_driver_stats(void *handle)
else
pr_info("Engine %d is in INTERRUPT MODE\n", pce_dev->dev_no);
pr_info("Engine %d outstanding request %d\n", pce_dev->dev_no,
pce_dev->no_of_queued_req);
spin_unlock_irqrestore(&pce_dev->lock, flags);
atomic_read(&pce_dev->no_of_queued_req));
}
EXPORT_SYMBOL(qce_get_driver_stats);
void qce_clear_driver_stats(void *handle)
{
unsigned long flags;
struct qce_device *pce_dev = (struct qce_device *) handle;
spin_lock_irqsave(&pce_dev->lock, flags);
pce_dev->qce_stats.no_of_timeouts = 0;
pce_dev->qce_stats.no_of_dummy_reqs = 0;
spin_unlock_irqrestore(&pce_dev->lock, flags);
}
EXPORT_SYMBOL(qce_clear_driver_stats);
@ -3084,7 +3081,6 @@ static void _sps_producer_callback(struct sps_event_notify *notify)
unsigned int req_info;
struct ce_sps_data *pce_sps_data;
struct ce_request_info *preq_info;
unsigned long flags;
print_notify_debug(notify);
@ -3113,10 +3109,8 @@ static void _sps_producer_callback(struct sps_event_notify *notify)
&pce_sps_data->out_transfer);
_qce_set_flag(&pce_sps_data->out_transfer,
SPS_IOVEC_FLAG_INT);
spin_lock_irqsave(&pce_dev->sps_lock, flags);
rc = sps_transfer(pce_dev->ce_bam_info.producer.pipe,
&pce_sps_data->out_transfer);
spin_unlock_irqrestore(&pce_dev->sps_lock, flags);
if (rc) {
pr_err("sps_xfr() fail (producer pipe=0x%lx) rc = %d\n",
(uintptr_t)pce_dev->ce_bam_info.producer.pipe,
@ -4590,18 +4584,27 @@ static int qce_dummy_req(struct qce_device *pce_dev)
static int select_mode(struct qce_device *pce_dev,
struct ce_request_info *preq_info)
{
unsigned long flags;
struct ce_sps_data *pce_sps_data = &preq_info->ce_sps;
unsigned int no_of_queued_req;
unsigned int cadence;
if (!pce_dev->no_get_around) {
_qce_set_flag(&pce_sps_data->out_transfer, SPS_IOVEC_FLAG_INT);
return 0;
}
spin_lock_irqsave(&pce_dev->lock, flags);
pce_dev->no_of_queued_req++;
/*
* claim ownership of device
*/
again:
if (cmpxchg(&pce_dev->owner, QCE_OWNER_NONE, QCE_OWNER_CLIENT)
!= QCE_OWNER_NONE) {
ndelay(40);
goto again;
}
no_of_queued_req = atomic_inc_return(&pce_dev->no_of_queued_req);
if (pce_dev->mode == IN_INTERRUPT_MODE) {
if (pce_dev->no_of_queued_req >= MAX_BUNCH_MODE_REQ) {
if (no_of_queued_req >= MAX_BUNCH_MODE_REQ) {
pce_dev->mode = IN_BUNCH_MODE;
pr_debug("pcedev %d mode switch to BUNCH\n",
pce_dev->dev_no);
@ -4618,17 +4621,21 @@ static int select_mode(struct qce_device *pce_dev,
}
} else {
pce_dev->intr_cadence++;
if (pce_dev->intr_cadence >= SET_INTR_AT_REQ) {
cadence = (preq_info->req_len >> 7) + 1;
if (cadence > SET_INTR_AT_REQ)
cadence = SET_INTR_AT_REQ;
if (pce_dev->intr_cadence < cadence || ((pce_dev->intr_cadence
== cadence) && pce_dev->cadence_flag))
atomic_inc(&pce_dev->bunch_cmd_seq);
else {
_qce_set_flag(&pce_sps_data->out_transfer,
SPS_IOVEC_FLAG_INT);
pce_dev->intr_cadence = 0;
atomic_set(&pce_dev->bunch_cmd_seq, 0);
atomic_set(&pce_dev->last_intr_seq, 0);
} else {
atomic_inc(&pce_dev->bunch_cmd_seq);
pce_dev->cadence_flag = ~pce_dev->cadence_flag;
}
}
spin_unlock_irqrestore(&pce_dev->lock, flags);
return 0;
}
@ -4746,6 +4753,7 @@ static int _qce_aead_ccm_req(void *handle, struct qce_req *q_req)
/* setup xfer type for producer callback handling */
preq_info->xfer_type = QCE_XFER_AEAD;
preq_info->req_len = totallen_in;
_qce_sps_iovec_count_init(pce_dev, req_info);
@ -4804,8 +4812,9 @@ static int _qce_aead_ccm_req(void *handle, struct qce_req *q_req)
_qce_ccm_get_around_output(pce_dev, preq_info, q_req->dir);
select_mode(pce_dev, preq_info);
rc = _qce_sps_transfer(pce_dev, req_info);
cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE);
}
rc = _qce_sps_transfer(pce_dev, req_info);
if (rc)
goto bad;
return 0;
@ -4973,6 +4982,7 @@ int qce_aead_req(void *handle, struct qce_req *q_req)
/* setup xfer type for producer callback handling */
preq_info->xfer_type = QCE_XFER_AEAD;
preq_info->req_len = totallen;
_qce_sps_iovec_count_init(pce_dev, req_info);
@ -5013,6 +5023,7 @@ int qce_aead_req(void *handle, struct qce_req *q_req)
SPS_IOVEC_FLAG_INT);
pce_sps_data->producer_state = QCE_PIPE_STATE_COMP;
}
rc = _qce_sps_transfer(pce_dev, req_info);
} else {
if (_qce_sps_add_sg_data(pce_dev, areq->src, totallen,
&pce_sps_data->in_transfer))
@ -5040,8 +5051,9 @@ int qce_aead_req(void *handle, struct qce_req *q_req)
pce_sps_data->producer_state = QCE_PIPE_STATE_IDLE;
}
select_mode(pce_dev, preq_info);
rc = _qce_sps_transfer(pce_dev, req_info);
cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE);
}
rc = _qce_sps_transfer(pce_dev, req_info);
if (rc)
goto bad;
return 0;
@ -5129,6 +5141,7 @@ int qce_ablk_cipher_req(void *handle, struct qce_req *c_req)
/* setup xfer type for producer callback handling */
preq_info->xfer_type = QCE_XFER_CIPHERING;
preq_info->req_len = areq->nbytes;
_qce_sps_iovec_count_init(pce_dev, req_info);
if (pce_dev->support_cmd_dscr)
@ -5160,8 +5173,8 @@ int qce_ablk_cipher_req(void *handle, struct qce_req *c_req)
}
select_mode(pce_dev, preq_info);
rc = _qce_sps_transfer(pce_dev, req_info);
cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE);
if (rc)
goto bad;
@ -5233,6 +5246,7 @@ int qce_process_sha_req(void *handle, struct qce_sha_req *sreq)
/* setup xfer type for producer callback handling */
preq_info->xfer_type = QCE_XFER_HASHING;
preq_info->req_len = sreq->size;
_qce_sps_iovec_count_init(pce_dev, req_info);
@ -5261,11 +5275,14 @@ int qce_process_sha_req(void *handle, struct qce_sha_req *sreq)
&pce_sps_data->out_transfer))
goto bad;
if (is_dummy)
if (is_dummy) {
_qce_set_flag(&pce_sps_data->out_transfer, SPS_IOVEC_FLAG_INT);
else
rc = _qce_sps_transfer(pce_dev, req_info);
} else {
select_mode(pce_dev, preq_info);
rc = _qce_sps_transfer(pce_dev, req_info);
rc = _qce_sps_transfer(pce_dev, req_info);
cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE);
}
if (rc)
goto bad;
return 0;
@ -5353,6 +5370,7 @@ int qce_f8_req(void *handle, struct qce_f8_req *req,
/* setup xfer type for producer callback handling */
preq_info->xfer_type = QCE_XFER_F8;
preq_info->req_len = req->data_len;
_qce_sps_iovec_count_init(pce_dev, req_info);
@ -5378,8 +5396,8 @@ int qce_f8_req(void *handle, struct qce_f8_req *req,
&pce_sps_data->out_transfer);
select_mode(pce_dev, preq_info);
rc = _qce_sps_transfer(pce_dev, req_info);
cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE);
if (rc)
goto bad;
return 0;
@ -5468,6 +5486,7 @@ int qce_f8_multi_pkt_req(void *handle, struct qce_f8_multi_pkt_req *mreq,
/* setup xfer type for producer callback handling */
preq_info->xfer_type = QCE_XFER_F8;
preq_info->req_len = total;
_qce_sps_iovec_count_init(pce_dev, req_info);
@ -5492,8 +5511,8 @@ int qce_f8_multi_pkt_req(void *handle, struct qce_f8_multi_pkt_req *mreq,
&pce_sps_data->out_transfer);
select_mode(pce_dev, preq_info);
rc = _qce_sps_transfer(pce_dev, req_info);
cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE);
if (rc == 0)
return 0;
@ -5554,6 +5573,7 @@ int qce_f9_req(void *handle, struct qce_f9_req *req, void *cookie,
/* setup xfer type for producer callback handling */
preq_info->xfer_type = QCE_XFER_F9;
preq_info->req_len = req->msize;
_qce_sps_iovec_count_init(pce_dev, req_info);
if (pce_dev->support_cmd_dscr)
@ -5573,8 +5593,8 @@ int qce_f9_req(void *handle, struct qce_f9_req *req, void *cookie,
&pce_sps_data->out_transfer);
select_mode(pce_dev, preq_info);
rc = _qce_sps_transfer(pce_dev, req_info);
cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE);
if (rc)
goto bad;
return 0;
@ -5939,9 +5959,7 @@ void *qce_open(struct platform_device *pdev, int *rc)
qce_setup_ce_sps_data(pce_dev);
qce_disable_clk(pce_dev);
setup_dummy_req(pce_dev);
spin_lock_init(&pce_dev->lock);
spin_lock_init(&pce_dev->sps_lock);
pce_dev->no_of_queued_req = 0;
atomic_set(&pce_dev->no_of_queued_req, 0);
pce_dev->mode = IN_INTERRUPT_MODE;
init_timer(&(pce_dev->timer));
pce_dev->timer.function = qce_multireq_timeout;
@ -5950,6 +5968,7 @@ void *qce_open(struct platform_device *pdev, int *rc)
pce_dev->intr_cadence = 0;
pce_dev->dev_no = pcedev_no;
pcedev_no++;
pce_dev->owner = QCE_OWNER_NONE;
mutex_unlock(&qce_iomap_mutex);
return pce_dev;
err:

View file

@ -1,4 +1,4 @@
/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@ -232,6 +232,7 @@ struct ce_request_info {
dma_addr_t phy_ota_src;
dma_addr_t phy_ota_dst;
unsigned int ota_size;
unsigned int req_len;
};
struct qce_driver_stats {

View file

@ -32,6 +32,7 @@
#include <linux/cache.h>
#include <linux/platform_data/qcom_crypto_device.h>
#include <linux/msm-bus.h>
#include <linux/hardirq.h>
#include <linux/qcrypto.h>
#include <crypto/ctr.h>
@ -51,7 +52,7 @@
#include "qce.h"
#define DEBUG_MAX_FNAME 16
#define DEBUG_MAX_RW_BUF 2048
#define DEBUG_MAX_RW_BUF 4096
#define QCRYPTO_BIG_NUMBER 9999999 /* a big number */
/*
@ -131,6 +132,7 @@ struct qcrypto_req_control {
struct crypto_engine *pce;
struct crypto_async_request *req;
struct qcrypto_resp_ctx *arsp;
int res; /* execution result */
};
struct crypto_engine {
@ -167,8 +169,14 @@ struct crypto_engine {
unsigned int max_req;
struct qcrypto_req_control *preq_pool;
atomic_t req_count;
bool issue_req; /* an request is being issued to qce */
bool first_engine; /* this engine is the first engine or not */
unsigned int irq_cpu; /* the cpu running the irq of this engine */
unsigned int max_req_used; /* debug stats */
};
#define MAX_SMP_CPU 8
struct crypto_priv {
/* CE features supported by target device*/
struct msm_ce_hw_support platform_support;
@ -208,21 +216,37 @@ struct crypto_priv {
enum resp_workq_sts sched_resp_workq_status;
enum req_processing_sts ce_req_proc_sts;
int cpu_getting_irqs_frm_first_ce;
struct crypto_engine *first_engine;
struct crypto_engine *scheduled_eng; /* last engine scheduled */
/* debug stats */
unsigned no_avail;
unsigned resp_stop;
unsigned resp_start;
unsigned max_qlen;
unsigned int queue_work_eng3;
unsigned int queue_work_not_eng3;
unsigned int queue_work_not_eng3_nz;
unsigned int max_resp_qlen;
unsigned int max_reorder_cnt;
unsigned int cpu_req[MAX_SMP_CPU+1];
};
static struct crypto_priv qcrypto_dev;
static struct crypto_engine *_qcrypto_static_assign_engine(
struct crypto_priv *cp);
static struct crypto_engine *_avail_eng(struct crypto_priv *cp);
static struct qcrypto_req_control *qcrypto_alloc_req_control(
struct crypto_engine *pce)
{
int i;
struct qcrypto_req_control *pqcrypto_req_control = pce->preq_pool;
unsigned int req_count;
for (i = 0; i < pce->max_req; i++) {
if (xchg(&pqcrypto_req_control->in_use, true) == false) {
atomic_inc(&pce->req_count);
req_count = atomic_inc_return(&pce->req_count);
if (req_count > pce->max_req_used)
pce->max_req_used = req_count;
return pqcrypto_req_control;
}
pqcrypto_req_control++;
@ -233,11 +257,13 @@ static struct qcrypto_req_control *qcrypto_alloc_req_control(
static void qcrypto_free_req_control(struct crypto_engine *pce,
struct qcrypto_req_control *preq)
{
/* do this before free req */
preq->req = NULL;
preq->arsp = NULL;
/* free req */
if (xchg(&preq->in_use, false) == false) {
pr_warn("request info %p free already\n", preq);
} else {
preq->req = NULL;
preq->arsp = NULL;
atomic_dec(&pce->req_count);
}
}
@ -441,7 +467,9 @@ struct qcrypto_cipher_req_ctx {
#define SHA_MAX_DIGEST_SIZE SHA256_DIGEST_SIZE
#define MSM_QCRYPTO_REQ_QUEUE_LENGTH 768
#define COMPLETION_CB_BACKLOG_LENGTH 768
#define COMPLETION_CB_BACKLOG_LENGTH_STOP 400
#define COMPLETION_CB_BACKLOG_LENGTH_START \
(COMPLETION_CB_BACKLOG_LENGTH_STOP / 2)
static uint8_t _std_init_vector_sha1_uint8[] = {
0x67, 0x45, 0x23, 0x01, 0xEF, 0xCD, 0xAB, 0x89,
@ -1066,6 +1094,7 @@ static int _disp_stats(int id)
unsigned long flags;
struct crypto_priv *cp = &qcrypto_dev;
struct crypto_engine *pe;
int i;
pstat = &_qcrypto_stat;
len = scnprintf(_debug_read_buf, DEBUG_MAX_RW_BUF - 1,
@ -1187,6 +1216,18 @@ static int _disp_stats(int id)
len += scnprintf(_debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1,
" AHASH operation fail : %llu\n",
pstat->ahash_op_fail);
len += scnprintf(_debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1,
" resp start, resp stop, max rsp queue reorder-cnt : %u %u %u %u\n",
cp->resp_start, cp->resp_stop,
cp->max_resp_qlen, cp->max_reorder_cnt);
len += scnprintf(_debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1,
" max queue legnth, no avail : %u %u\n",
cp->max_qlen, cp->no_avail);
len += scnprintf(_debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1,
" work queue : %u %u %u\n",
cp->queue_work_eng3,
cp->queue_work_not_eng3,
cp->queue_work_not_eng3_nz);
len += scnprintf(_debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1,
"\n");
spin_lock_irqsave(&cp->lock, flags);
@ -1194,8 +1235,9 @@ static int _disp_stats(int id)
len += scnprintf(
_debug_read_buf + len,
DEBUG_MAX_RW_BUF - len - 1,
" Engine %4d Req : %llu\n",
" Engine %4d Req max %d : %llu\n",
pe->unit,
pe->max_req_used,
pe->total_req
);
len += scnprintf(
@ -1208,6 +1250,14 @@ static int _disp_stats(int id)
qce_get_driver_stats(pe->qce);
}
spin_unlock_irqrestore(&cp->lock, flags);
for (i = 0; i < MAX_SMP_CPU+1; i++)
if (cp->cpu_req[i])
len += scnprintf(
_debug_read_buf + len,
DEBUG_MAX_RW_BUF - len - 1,
"CPU %d Issue Req : %d\n",
i, cp->cpu_req[i]);
return len;
}
@ -1217,13 +1267,25 @@ static void _qcrypto_remove_engine(struct crypto_engine *pengine)
struct qcrypto_alg *q_alg;
struct qcrypto_alg *n;
unsigned long flags;
struct crypto_engine *pe;
cp = pengine->pcp;
spin_lock_irqsave(&cp->lock, flags);
list_del(&pengine->elist);
if (pengine->first_engine) {
cp->first_engine = NULL;
pe = list_first_entry(&cp->engine_list, struct crypto_engine,
elist);
if (pe) {
pe->first_engine = true;
cp->first_engine = pe;
}
}
if (cp->next_engine == pengine)
cp->next_engine = NULL;
if (cp->scheduled_eng == pengine)
cp->scheduled_eng = NULL;
spin_unlock_irqrestore(&cp->lock, flags);
cp->total_units--;
@ -1432,41 +1494,15 @@ static int _qcrypto_setkey_3des(struct crypto_ablkcipher *cipher, const u8 *key,
return 0;
};
static struct crypto_engine *eng_sel_avoid_first(struct crypto_priv *cp)
{
/*
* This function need not be spinlock protected when called from
* the seq_response workq as it will not have any contentions when all
* request processing is stopped.
*/
struct crypto_engine *p;
struct crypto_engine *q = NULL;
int max_user = QCRYPTO_BIG_NUMBER;
int use_cnt;
if (unlikely(list_empty(&cp->engine_list))) {
pr_err("%s: no valid ce to schedule\n", __func__);
return NULL;
}
p = list_first_entry(&cp->engine_list, struct crypto_engine,
elist);
list_for_each_entry_continue(p, &cp->engine_list, elist) {
use_cnt = atomic_read(&p->req_count);
if ((use_cnt < p->max_req) && (use_cnt < max_user)) {
q = p;
max_user = use_cnt;
}
}
return q;
}
static void seq_response(struct work_struct *work)
{
struct crypto_priv *cp = container_of(work, struct crypto_priv,
resp_work);
struct llist_node *list;
struct llist_node *rev = NULL;
struct crypto_engine *pengine;
unsigned long flags;
int total_unit;
again:
list = llist_del_all(&cp->ordered_resp_list);
@ -1485,7 +1521,6 @@ again:
while (rev) {
struct qcrypto_resp_ctx *arsp;
struct crypto_async_request *areq;
struct crypto_engine *pengine;
arsp = container_of(rev, struct qcrypto_resp_ctx, llist);
rev = llist_next(rev);
@ -1495,12 +1530,20 @@ again:
areq->complete(areq, arsp->res);
local_bh_enable();
atomic_dec(&cp->resp_cnt);
if (ACCESS_ONCE(cp->ce_req_proc_sts) == STOPPED &&
atomic_read(&cp->resp_cnt) <=
(COMPLETION_CB_BACKLOG_LENGTH / 2)) {
pengine = eng_sel_avoid_first(cp);
}
if (atomic_read(&cp->resp_cnt) < COMPLETION_CB_BACKLOG_LENGTH_START &&
(cmpxchg(&cp->ce_req_proc_sts, STOPPED, IN_PROGRESS)
== STOPPED)) {
cp->resp_start++;
for (total_unit = cp->total_units; total_unit-- > 0;) {
spin_lock_irqsave(&cp->lock, flags);
pengine = _avail_eng(cp);
spin_unlock_irqrestore(&cp->lock, flags);
if (pengine)
_start_qcrypto_process(cp, pengine);
else
break;
}
}
end:
@ -1512,12 +1555,19 @@ end:
goto end;
}
static void _qcrypto_tfm_complete(struct crypto_priv *cp, u32 type,
void *tfm_ctx)
#define SCHEUDLE_RSP_QLEN_THRESHOLD 64
static void _qcrypto_tfm_complete(struct crypto_engine *pengine, u32 type,
void *tfm_ctx,
struct qcrypto_resp_ctx *cur_arsp,
int res)
{
struct crypto_priv *cp = pengine->pcp;
unsigned long flags;
struct qcrypto_resp_ctx *arsp;
struct list_head *plist;
unsigned int resp_qlen;
unsigned int cnt = 0;
switch (type) {
case CRYPTO_ALG_TYPE_AHASH:
@ -1531,6 +1581,8 @@ static void _qcrypto_tfm_complete(struct crypto_priv *cp, u32 type,
}
spin_lock_irqsave(&cp->lock, flags);
cur_arsp->res = res;
while (!list_empty(plist)) {
arsp = list_first_entry(plist,
struct qcrypto_resp_ctx, list);
@ -1539,16 +1591,51 @@ static void _qcrypto_tfm_complete(struct crypto_priv *cp, u32 type,
else {
list_del(&arsp->list);
llist_add(&arsp->llist, &cp->ordered_resp_list);
atomic_inc(&cp->resp_cnt);
cnt++;
}
}
resp_qlen = atomic_read(&cp->resp_cnt);
if (resp_qlen > cp->max_resp_qlen)
cp->max_resp_qlen = resp_qlen;
if (cnt > cp->max_reorder_cnt)
cp->max_reorder_cnt = cnt;
if ((resp_qlen >= COMPLETION_CB_BACKLOG_LENGTH_STOP) &&
cmpxchg(&cp->ce_req_proc_sts, IN_PROGRESS,
STOPPED) == IN_PROGRESS) {
cp->resp_stop++;
}
spin_unlock_irqrestore(&cp->lock, flags);
retry:
if (!llist_empty(&cp->ordered_resp_list)) {
unsigned int cpu;
if (pengine->first_engine) {
cpu = WORK_CPU_UNBOUND;
cp->queue_work_eng3++;
} else {
cp->queue_work_not_eng3++;
cpu = cp->cpu_getting_irqs_frm_first_ce;
/*
* If source not the first engine, and there
* are outstanding requests going on first engine,
* skip scheduling of work queue to anticipate
* more may be coming. If the response queue
* length exceeds threshold, to avoid further
* delay, schedule work queue immediately.
*/
if (cp->first_engine && atomic_read(
&cp->first_engine->req_count)) {
if (resp_qlen < SCHEUDLE_RSP_QLEN_THRESHOLD)
return;
cp->queue_work_not_eng3_nz++;
}
}
if (cmpxchg(&cp->sched_resp_workq_status, NOT_SCHEDULED,
IS_SCHEDULED) == NOT_SCHEDULED)
queue_work_on(cp->cpu_getting_irqs_frm_first_ce,
cp->resp_wq, &cp->resp_work);
queue_work_on(cpu, cp->resp_wq, &cp->resp_work);
else if (cmpxchg(&cp->sched_resp_workq_status, IS_SCHEDULED,
SCHEDULE_AGAIN) == NOT_SCHEDULED)
goto retry;
@ -1559,36 +1646,34 @@ static void req_done(struct qcrypto_req_control *pqcrypto_req_control)
{
struct crypto_engine *pengine;
struct crypto_async_request *areq;
struct crypto_engine *pe;
struct crypto_priv *cp;
unsigned long flags;
struct qcrypto_resp_ctx *arsp;
u32 type = 0;
void *tfm_ctx = NULL;
unsigned int cpu;
int res;
pengine = pqcrypto_req_control->pce;
cp = pengine->pcp;
spin_lock_irqsave(&cp->lock, flags);
areq = pqcrypto_req_control->req;
arsp = pqcrypto_req_control->arsp;
res = pqcrypto_req_control->res;
qcrypto_free_req_control(pengine, pqcrypto_req_control);
if (areq) {
type = crypto_tfm_alg_type(areq->tfm);
tfm_ctx = crypto_tfm_ctx(areq->tfm);
}
pe = list_first_entry(&cp->engine_list, struct crypto_engine, elist);
if (pe == pengine)
if (cp->cpu_getting_irqs_frm_first_ce != smp_processor_id())
cp->cpu_getting_irqs_frm_first_ce = smp_processor_id();
spin_unlock_irqrestore(&cp->lock, flags);
if (atomic_read(&cp->resp_cnt) <= COMPLETION_CB_BACKLOG_LENGTH) {
cmpxchg(&cp->ce_req_proc_sts, STOPPED, IN_PROGRESS);
_start_qcrypto_process(cp, pengine);
} else
cmpxchg(&cp->ce_req_proc_sts, IN_PROGRESS, STOPPED);
cpu = smp_processor_id();
pengine->irq_cpu = cpu;
if (pengine->first_engine) {
if (cpu != cp->cpu_getting_irqs_frm_first_ce)
cp->cpu_getting_irqs_frm_first_ce = cpu;
}
if (areq)
_qcrypto_tfm_complete(cp, type, tfm_ctx);
_qcrypto_tfm_complete(pengine, type, tfm_ctx, arsp, res);
if (ACCESS_ONCE(cp->ce_req_proc_sts) == IN_PROGRESS)
_start_qcrypto_process(cp, pengine);
}
static void _qce_ahash_complete(void *cookie, unsigned char *digest,
@ -1639,10 +1724,10 @@ static void _qce_ahash_complete(void *cookie, unsigned char *digest,
rctx->first_blk = 0;
if (ret) {
pqcrypto_req_control->arsp->res = -ENXIO;
pqcrypto_req_control->res = -ENXIO;
pstat->ahash_op_fail++;
} else {
pqcrypto_req_control->arsp->res = 0;
pqcrypto_req_control->res = 0;
pstat->ahash_op_success++;
}
if (cp->ce_support.aligned_only) {
@ -1684,10 +1769,10 @@ static void _qce_ablk_cipher_complete(void *cookie, unsigned char *icb,
memcpy(ctx->iv, iv, crypto_ablkcipher_ivsize(ablk));
if (ret) {
pqcrypto_req_control->arsp->res = -ENXIO;
pqcrypto_req_control->res = -ENXIO;
pstat->ablk_cipher_op_fail++;
} else {
pqcrypto_req_control->arsp->res = 0;
pqcrypto_req_control->res = 0;
pstat->ablk_cipher_op_success++;
}
@ -1773,7 +1858,7 @@ static void _qce_aead_complete(void *cookie, unsigned char *icv,
else
pstat->aead_op_success++;
pqcrypto_req_control->arsp->res = ret;
pqcrypto_req_control->res = ret;
req_done(pqcrypto_req_control);
}
@ -2100,12 +2185,24 @@ static int _start_qcrypto_process(struct crypto_priv *cp,
struct aead_request *aead_req;
struct qcrypto_resp_ctx *arsp;
struct qcrypto_req_control *pqcrypto_req_control;
unsigned int cpu = MAX_SMP_CPU;
if (ACCESS_ONCE(cp->ce_req_proc_sts) == STOPPED)
return 0;
if (in_interrupt()) {
cpu = smp_processor_id();
if (cpu >= MAX_SMP_CPU)
cpu = MAX_SMP_CPU - 1;
} else
cpu = MAX_SMP_CPU;
pstat = &_qcrypto_stat;
again:
spin_lock_irqsave(&cp->lock, flags);
if (atomic_read(&pengine->req_count) >= (pengine->max_req)) {
if (pengine->issue_req ||
atomic_read(&pengine->req_count) >= (pengine->max_req)) {
spin_unlock_irqrestore(&cp->lock, flags);
return 0;
}
@ -2176,7 +2273,6 @@ again:
break;
}
atomic_inc(&cp->resp_cnt);
arsp->res = -EINPROGRESS;
arsp->async_req = async_req;
pqcrypto_req_control->pce = pengine;
@ -2185,6 +2281,10 @@ again:
pengine->active_seq++;
pengine->check_flag = true;
pengine->issue_req = true;
cp->cpu_req[cpu]++;
smp_mb(); /* make it visible */
spin_unlock_irqrestore(&cp->lock, flags);
if (backlog_eng)
backlog_eng->complete(backlog_eng, -EINPROGRESS);
@ -2204,9 +2304,12 @@ again:
default:
ret = -EINVAL;
};
pengine->issue_req = false;
smp_mb(); /* make it visible */
pengine->total_req++;
if (ret) {
arsp->res = ret;
pengine->err_req++;
qcrypto_free_req_control(pengine, pqcrypto_req_control);
@ -2218,32 +2321,48 @@ again:
else
pstat->aead_op_fail++;
_qcrypto_tfm_complete(cp, type, tfm_ctx);
_qcrypto_tfm_complete(pengine, type, tfm_ctx, arsp, ret);
goto again;
};
return ret;
}
static inline struct crypto_engine *_next_eng(struct crypto_priv *cp,
struct crypto_engine *p)
{
if (p == NULL || list_is_last(&p->elist, &cp->engine_list))
p = list_first_entry(&cp->engine_list, struct crypto_engine,
elist);
else
p = list_entry(p->elist.next, struct crypto_engine, elist);
return p;
}
static struct crypto_engine *_avail_eng(struct crypto_priv *cp)
{
/* call this function with spinlock set */
struct crypto_engine *p;
struct crypto_engine *q = NULL;
int max_user = QCRYPTO_BIG_NUMBER;
int use_cnt;
struct crypto_engine *p = cp->scheduled_eng;
struct crypto_engine *q1;
int eng_cnt = cp->total_units;
if (unlikely(list_empty(&cp->engine_list))) {
pr_err("%s: no valid ce to schedule\n", __func__);
return NULL;
}
list_for_each_entry(p, &cp->engine_list, elist) {
use_cnt = atomic_read(&p->req_count);
if ((use_cnt < p->max_req) && (use_cnt < max_user)) {
p = _next_eng(cp, p);
q1 = p;
while (eng_cnt-- > 0) {
if (!p->issue_req && atomic_read(&p->req_count) < p->max_req) {
q = p;
max_user = use_cnt;
break;
}
p = _next_eng(cp, p);
if (q1 == p)
break;
}
cp->scheduled_eng = q;
return q;
}
@ -2261,6 +2380,8 @@ static int _qcrypto_queue_req(struct crypto_priv *cp,
} else {
ret = crypto_enqueue_request(&cp->req_queue, req);
pengine = _avail_eng(cp);
if (cp->req_queue.qlen > cp->max_qlen)
cp->max_qlen = cp->req_queue.qlen;
}
if (pengine) {
switch (pengine->bw_state) {
@ -2286,16 +2407,12 @@ static int _qcrypto_queue_req(struct crypto_priv *cp,
pengine = NULL;
break;
}
} else {
cp->no_avail++;
}
spin_unlock_irqrestore(&cp->lock, flags);
if (pengine) {
if (atomic_read(&cp->resp_cnt) <=
COMPLETION_CB_BACKLOG_LENGTH) {
cmpxchg(&cp->ce_req_proc_sts, STOPPED, IN_PROGRESS);
_start_qcrypto_process(cp, pengine);
} else
cmpxchg(&cp->ce_req_proc_sts, IN_PROGRESS, STOPPED);
}
if (pengine && (ACCESS_ONCE(cp->ce_req_proc_sts) == IN_PROGRESS))
_start_qcrypto_process(cp, pengine);
return ret;
}
@ -4762,6 +4879,8 @@ static int _qcrypto_probe(struct platform_device *pdev)
pengine->active_seq = 0;
pengine->last_active_seq = 0;
pengine->check_flag = false;
pengine->max_req_used = 0;
pengine->issue_req = false;
crypto_init_queue(&pengine->req_queue, MSM_QCRYPTO_REQ_QUEUE_LENGTH);
@ -4770,6 +4889,9 @@ static int _qcrypto_probe(struct platform_device *pdev)
pengine->unit = cp->total_units;
spin_lock_irqsave(&cp->lock, flags);
pengine->first_engine = list_empty(&cp->engine_list);
if (pengine->first_engine)
cp->first_engine = pengine;
list_add_tail(&pengine->elist, &cp->engine_list);
cp->next_engine = pengine;
spin_unlock_irqrestore(&cp->lock, flags);
@ -5292,6 +5414,7 @@ static ssize_t _debug_stats_write(struct file *file, const char __user *buf,
unsigned long flags;
struct crypto_priv *cp = &qcrypto_dev;
struct crypto_engine *pe;
int i;
memset((char *)&_qcrypto_stat, 0, sizeof(struct crypto_stat));
spin_lock_irqsave(&cp->lock, flags);
@ -5299,7 +5422,19 @@ static ssize_t _debug_stats_write(struct file *file, const char __user *buf,
pe->total_req = 0;
pe->err_req = 0;
qce_clear_driver_stats(pe->qce);
pe->max_req_used = 0;
}
cp->max_qlen = 0;
cp->resp_start = 0;
cp->resp_stop = 0;
cp->no_avail = 0;
cp->max_resp_qlen = 0;
cp->queue_work_eng3 = 0;
cp->queue_work_not_eng3 = 0;
cp->queue_work_not_eng3_nz = 0;
cp->max_reorder_cnt = 0;
for (i = 0; i < MAX_SMP_CPU + 1; i++)
cp->cpu_req[i] = 0;
spin_unlock_irqrestore(&cp->lock, flags);
return count;
}
@ -5362,6 +5497,8 @@ static int __init _qcrypto_init(void)
pcp->total_units = 0;
pcp->platform_support.bus_scale_table = NULL;
pcp->next_engine = NULL;
pcp->scheduled_eng = NULL;
pcp->ce_req_proc_sts = IN_PROGRESS;
crypto_init_queue(&pcp->req_queue, MSM_QCRYPTO_REQ_QUEUE_LENGTH);
return platform_driver_register(&_qualcomm_crypto);
}