diff --git a/drivers/crypto/msm/qce50.c b/drivers/crypto/msm/qce50.c index 6bcc08e94657..61f99370863d 100644 --- a/drivers/crypto/msm/qce50.c +++ b/drivers/crypto/msm/qce50.c @@ -1,6 +1,6 @@ /* Qualcomm Crypto Engine driver. * - * Copyright (c) 2012-2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2012-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -69,7 +69,7 @@ static LIST_HEAD(qce50_bam_list); /* Max number of request supported */ #define MAX_QCE_BAM_REQ 8 /* Interrupt flag will be set for every SET_INTR_AT_REQ request */ -#define SET_INTR_AT_REQ (MAX_QCE_BAM_REQ - 2) +#define SET_INTR_AT_REQ (MAX_QCE_BAM_REQ / 2) /* To create extra request space to hold dummy request */ #define MAX_QCE_BAM_REQ_WITH_DUMMY_REQ (MAX_QCE_BAM_REQ + 1) /* Allocate the memory for MAX_QCE_BAM_REQ + 1 (for dummy request) */ @@ -84,6 +84,12 @@ static LIST_HEAD(qce50_bam_list); /* Index to point the dummy request */ #define DUMMY_REQ_INDEX MAX_QCE_BAM_REQ +enum qce_owner { + QCE_OWNER_NONE = 0, + QCE_OWNER_CLIENT = 1, + QCE_OWNER_TIMEOUT = 2 +}; + struct dummy_request { struct qce_sha_req sreq; uint8_t *in_buf; @@ -133,9 +139,8 @@ struct qce_device { struct ce_bam_info ce_bam_info; struct ce_request_info ce_request_info[MAX_QCE_ALLOC_BAM_REQ]; unsigned int ce_request_index; - spinlock_t lock; - spinlock_t sps_lock; - unsigned int no_of_queued_req; + enum qce_owner owner; + atomic_t no_of_queued_req; struct timer_list timer; struct dummy_request dummyreq; unsigned int mode; @@ -144,6 +149,7 @@ struct qce_device { struct qce_driver_stats qce_stats; atomic_t bunch_cmd_seq; atomic_t last_intr_seq; + bool cadence_flag; }; static void print_notify_debug(struct sps_event_notify *notify); @@ -2539,7 +2545,6 @@ static int _qce_sps_add_cmd(struct qce_device *pce_dev, uint32_t flag, static int _qce_sps_transfer(struct qce_device *pce_dev, int req_info) { int rc = 0; - unsigned long flags; struct ce_sps_data *pce_sps_data; pce_sps_data = &pce_dev->ce_request_info[req_info].ce_sps; @@ -2551,7 +2556,6 @@ static int _qce_sps_transfer(struct qce_device *pce_dev, int req_info) (unsigned int) req_info)); _qce_dump_descr_fifos_dbg(pce_dev, req_info); - spin_lock_irqsave(&pce_dev->sps_lock, flags); if (pce_sps_data->in_transfer.iovec_count) { rc = sps_transfer(pce_dev->ce_bam_info.consumer.pipe, &pce_sps_data->in_transfer); @@ -2570,7 +2574,6 @@ static int _qce_sps_transfer(struct qce_device *pce_dev, int req_info) ret: if (rc) _qce_dump_descr_fifos(pce_dev, req_info); - spin_unlock_irqrestore(&pce_dev->sps_lock, flags); return rc; } @@ -2955,23 +2958,20 @@ static inline int qce_alloc_req_info(struct qce_device *pce_dev) } } pr_warn("pcedev %d no reqs available no_of_queued_req %d\n", - pce_dev->dev_no, pce_dev->no_of_queued_req); + pce_dev->dev_no, atomic_read( + &pce_dev->no_of_queued_req)); return -EBUSY; } static inline void qce_free_req_info(struct qce_device *pce_dev, int req_info, bool is_complete) { - unsigned long flags; - - spin_lock_irqsave(&pce_dev->lock, flags); pce_dev->ce_request_info[req_info].xfer_type = QCE_XFER_TYPE_LAST; if (xchg(&pce_dev->ce_request_info[req_info].in_use, false) == true) { if (req_info < MAX_QCE_BAM_REQ && is_complete) - pce_dev->no_of_queued_req--; + atomic_dec(&pce_dev->no_of_queued_req); } else pr_warn("request info %d free already\n", req_info); - spin_unlock_irqrestore(&pce_dev->lock, flags); } static void print_notify_debug(struct sps_event_notify *notify) @@ -3018,7 +3018,6 @@ static void qce_multireq_timeout(unsigned long data) { struct qce_device *pce_dev = (struct qce_device *)data; int ret = 0; - unsigned long flags; int last_seq; last_seq = atomic_read(&pce_dev->bunch_cmd_seq); @@ -3029,27 +3028,29 @@ static void qce_multireq_timeout(unsigned long data) return; } /* last bunch mode command time out */ - spin_lock_irqsave(&pce_dev->lock, flags); + if (cmpxchg(&pce_dev->owner, QCE_OWNER_NONE, QCE_OWNER_TIMEOUT) + != QCE_OWNER_NONE) { + mod_timer(&(pce_dev->timer), (jiffies + DELAY_IN_JIFFIES)); + return; + } del_timer(&(pce_dev->timer)); pce_dev->mode = IN_INTERRUPT_MODE; pce_dev->qce_stats.no_of_timeouts++; pr_debug("pcedev %d mode switch to INTR\n", pce_dev->dev_no); - spin_unlock_irqrestore(&pce_dev->lock, flags); ret = qce_dummy_req(pce_dev); if (ret) pr_warn("pcedev %d: Failed to insert dummy req\n", pce_dev->dev_no); + cmpxchg(&pce_dev->owner, QCE_OWNER_TIMEOUT, QCE_OWNER_NONE); } void qce_get_driver_stats(void *handle) { - unsigned long flags; struct qce_device *pce_dev = (struct qce_device *) handle; if (!_qce50_disp_stats) return; - spin_lock_irqsave(&pce_dev->lock, flags); pr_info("Engine %d timeout occuured %d\n", pce_dev->dev_no, pce_dev->qce_stats.no_of_timeouts); pr_info("Engine %d dummy request inserted %d\n", pce_dev->dev_no, @@ -3059,20 +3060,16 @@ void qce_get_driver_stats(void *handle) else pr_info("Engine %d is in INTERRUPT MODE\n", pce_dev->dev_no); pr_info("Engine %d outstanding request %d\n", pce_dev->dev_no, - pce_dev->no_of_queued_req); - spin_unlock_irqrestore(&pce_dev->lock, flags); + atomic_read(&pce_dev->no_of_queued_req)); } EXPORT_SYMBOL(qce_get_driver_stats); void qce_clear_driver_stats(void *handle) { - unsigned long flags; struct qce_device *pce_dev = (struct qce_device *) handle; - spin_lock_irqsave(&pce_dev->lock, flags); pce_dev->qce_stats.no_of_timeouts = 0; pce_dev->qce_stats.no_of_dummy_reqs = 0; - spin_unlock_irqrestore(&pce_dev->lock, flags); } EXPORT_SYMBOL(qce_clear_driver_stats); @@ -3084,7 +3081,6 @@ static void _sps_producer_callback(struct sps_event_notify *notify) unsigned int req_info; struct ce_sps_data *pce_sps_data; struct ce_request_info *preq_info; - unsigned long flags; print_notify_debug(notify); @@ -3113,10 +3109,8 @@ static void _sps_producer_callback(struct sps_event_notify *notify) &pce_sps_data->out_transfer); _qce_set_flag(&pce_sps_data->out_transfer, SPS_IOVEC_FLAG_INT); - spin_lock_irqsave(&pce_dev->sps_lock, flags); rc = sps_transfer(pce_dev->ce_bam_info.producer.pipe, &pce_sps_data->out_transfer); - spin_unlock_irqrestore(&pce_dev->sps_lock, flags); if (rc) { pr_err("sps_xfr() fail (producer pipe=0x%lx) rc = %d\n", (uintptr_t)pce_dev->ce_bam_info.producer.pipe, @@ -4590,18 +4584,27 @@ static int qce_dummy_req(struct qce_device *pce_dev) static int select_mode(struct qce_device *pce_dev, struct ce_request_info *preq_info) { - unsigned long flags; struct ce_sps_data *pce_sps_data = &preq_info->ce_sps; + unsigned int no_of_queued_req; + unsigned int cadence; if (!pce_dev->no_get_around) { _qce_set_flag(&pce_sps_data->out_transfer, SPS_IOVEC_FLAG_INT); return 0; } - spin_lock_irqsave(&pce_dev->lock, flags); - pce_dev->no_of_queued_req++; + /* + * claim ownership of device + */ +again: + if (cmpxchg(&pce_dev->owner, QCE_OWNER_NONE, QCE_OWNER_CLIENT) + != QCE_OWNER_NONE) { + ndelay(40); + goto again; + } + no_of_queued_req = atomic_inc_return(&pce_dev->no_of_queued_req); if (pce_dev->mode == IN_INTERRUPT_MODE) { - if (pce_dev->no_of_queued_req >= MAX_BUNCH_MODE_REQ) { + if (no_of_queued_req >= MAX_BUNCH_MODE_REQ) { pce_dev->mode = IN_BUNCH_MODE; pr_debug("pcedev %d mode switch to BUNCH\n", pce_dev->dev_no); @@ -4618,17 +4621,21 @@ static int select_mode(struct qce_device *pce_dev, } } else { pce_dev->intr_cadence++; - if (pce_dev->intr_cadence >= SET_INTR_AT_REQ) { + cadence = (preq_info->req_len >> 7) + 1; + if (cadence > SET_INTR_AT_REQ) + cadence = SET_INTR_AT_REQ; + if (pce_dev->intr_cadence < cadence || ((pce_dev->intr_cadence + == cadence) && pce_dev->cadence_flag)) + atomic_inc(&pce_dev->bunch_cmd_seq); + else { _qce_set_flag(&pce_sps_data->out_transfer, SPS_IOVEC_FLAG_INT); pce_dev->intr_cadence = 0; atomic_set(&pce_dev->bunch_cmd_seq, 0); atomic_set(&pce_dev->last_intr_seq, 0); - } else { - atomic_inc(&pce_dev->bunch_cmd_seq); + pce_dev->cadence_flag = ~pce_dev->cadence_flag; } } - spin_unlock_irqrestore(&pce_dev->lock, flags); return 0; } @@ -4746,6 +4753,7 @@ static int _qce_aead_ccm_req(void *handle, struct qce_req *q_req) /* setup xfer type for producer callback handling */ preq_info->xfer_type = QCE_XFER_AEAD; + preq_info->req_len = totallen_in; _qce_sps_iovec_count_init(pce_dev, req_info); @@ -4804,8 +4812,9 @@ static int _qce_aead_ccm_req(void *handle, struct qce_req *q_req) _qce_ccm_get_around_output(pce_dev, preq_info, q_req->dir); select_mode(pce_dev, preq_info); + rc = _qce_sps_transfer(pce_dev, req_info); + cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE); } - rc = _qce_sps_transfer(pce_dev, req_info); if (rc) goto bad; return 0; @@ -4973,6 +4982,7 @@ int qce_aead_req(void *handle, struct qce_req *q_req) /* setup xfer type for producer callback handling */ preq_info->xfer_type = QCE_XFER_AEAD; + preq_info->req_len = totallen; _qce_sps_iovec_count_init(pce_dev, req_info); @@ -5013,6 +5023,7 @@ int qce_aead_req(void *handle, struct qce_req *q_req) SPS_IOVEC_FLAG_INT); pce_sps_data->producer_state = QCE_PIPE_STATE_COMP; } + rc = _qce_sps_transfer(pce_dev, req_info); } else { if (_qce_sps_add_sg_data(pce_dev, areq->src, totallen, &pce_sps_data->in_transfer)) @@ -5040,8 +5051,9 @@ int qce_aead_req(void *handle, struct qce_req *q_req) pce_sps_data->producer_state = QCE_PIPE_STATE_IDLE; } select_mode(pce_dev, preq_info); + rc = _qce_sps_transfer(pce_dev, req_info); + cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE); } - rc = _qce_sps_transfer(pce_dev, req_info); if (rc) goto bad; return 0; @@ -5129,6 +5141,7 @@ int qce_ablk_cipher_req(void *handle, struct qce_req *c_req) /* setup xfer type for producer callback handling */ preq_info->xfer_type = QCE_XFER_CIPHERING; + preq_info->req_len = areq->nbytes; _qce_sps_iovec_count_init(pce_dev, req_info); if (pce_dev->support_cmd_dscr) @@ -5160,8 +5173,8 @@ int qce_ablk_cipher_req(void *handle, struct qce_req *c_req) } select_mode(pce_dev, preq_info); - rc = _qce_sps_transfer(pce_dev, req_info); + cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE); if (rc) goto bad; @@ -5233,6 +5246,7 @@ int qce_process_sha_req(void *handle, struct qce_sha_req *sreq) /* setup xfer type for producer callback handling */ preq_info->xfer_type = QCE_XFER_HASHING; + preq_info->req_len = sreq->size; _qce_sps_iovec_count_init(pce_dev, req_info); @@ -5261,11 +5275,14 @@ int qce_process_sha_req(void *handle, struct qce_sha_req *sreq) &pce_sps_data->out_transfer)) goto bad; - if (is_dummy) + if (is_dummy) { _qce_set_flag(&pce_sps_data->out_transfer, SPS_IOVEC_FLAG_INT); - else + rc = _qce_sps_transfer(pce_dev, req_info); + } else { select_mode(pce_dev, preq_info); - rc = _qce_sps_transfer(pce_dev, req_info); + rc = _qce_sps_transfer(pce_dev, req_info); + cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE); + } if (rc) goto bad; return 0; @@ -5353,6 +5370,7 @@ int qce_f8_req(void *handle, struct qce_f8_req *req, /* setup xfer type for producer callback handling */ preq_info->xfer_type = QCE_XFER_F8; + preq_info->req_len = req->data_len; _qce_sps_iovec_count_init(pce_dev, req_info); @@ -5378,8 +5396,8 @@ int qce_f8_req(void *handle, struct qce_f8_req *req, &pce_sps_data->out_transfer); select_mode(pce_dev, preq_info); - rc = _qce_sps_transfer(pce_dev, req_info); + cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE); if (rc) goto bad; return 0; @@ -5468,6 +5486,7 @@ int qce_f8_multi_pkt_req(void *handle, struct qce_f8_multi_pkt_req *mreq, /* setup xfer type for producer callback handling */ preq_info->xfer_type = QCE_XFER_F8; + preq_info->req_len = total; _qce_sps_iovec_count_init(pce_dev, req_info); @@ -5492,8 +5511,8 @@ int qce_f8_multi_pkt_req(void *handle, struct qce_f8_multi_pkt_req *mreq, &pce_sps_data->out_transfer); select_mode(pce_dev, preq_info); - rc = _qce_sps_transfer(pce_dev, req_info); + cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE); if (rc == 0) return 0; @@ -5554,6 +5573,7 @@ int qce_f9_req(void *handle, struct qce_f9_req *req, void *cookie, /* setup xfer type for producer callback handling */ preq_info->xfer_type = QCE_XFER_F9; + preq_info->req_len = req->msize; _qce_sps_iovec_count_init(pce_dev, req_info); if (pce_dev->support_cmd_dscr) @@ -5573,8 +5593,8 @@ int qce_f9_req(void *handle, struct qce_f9_req *req, void *cookie, &pce_sps_data->out_transfer); select_mode(pce_dev, preq_info); - rc = _qce_sps_transfer(pce_dev, req_info); + cmpxchg(&pce_dev->owner, QCE_OWNER_CLIENT, QCE_OWNER_NONE); if (rc) goto bad; return 0; @@ -5939,9 +5959,7 @@ void *qce_open(struct platform_device *pdev, int *rc) qce_setup_ce_sps_data(pce_dev); qce_disable_clk(pce_dev); setup_dummy_req(pce_dev); - spin_lock_init(&pce_dev->lock); - spin_lock_init(&pce_dev->sps_lock); - pce_dev->no_of_queued_req = 0; + atomic_set(&pce_dev->no_of_queued_req, 0); pce_dev->mode = IN_INTERRUPT_MODE; init_timer(&(pce_dev->timer)); pce_dev->timer.function = qce_multireq_timeout; @@ -5950,6 +5968,7 @@ void *qce_open(struct platform_device *pdev, int *rc) pce_dev->intr_cadence = 0; pce_dev->dev_no = pcedev_no; pcedev_no++; + pce_dev->owner = QCE_OWNER_NONE; mutex_unlock(&qce_iomap_mutex); return pce_dev; err: diff --git a/drivers/crypto/msm/qce50.h b/drivers/crypto/msm/qce50.h index cef466382ee4..6dba3664ff08 100644 --- a/drivers/crypto/msm/qce50.h +++ b/drivers/crypto/msm/qce50.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -232,6 +232,7 @@ struct ce_request_info { dma_addr_t phy_ota_src; dma_addr_t phy_ota_dst; unsigned int ota_size; + unsigned int req_len; }; struct qce_driver_stats { diff --git a/drivers/crypto/msm/qcrypto.c b/drivers/crypto/msm/qcrypto.c index ab026d24e978..faeff0b55202 100644 --- a/drivers/crypto/msm/qcrypto.c +++ b/drivers/crypto/msm/qcrypto.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -51,7 +52,7 @@ #include "qce.h" #define DEBUG_MAX_FNAME 16 -#define DEBUG_MAX_RW_BUF 2048 +#define DEBUG_MAX_RW_BUF 4096 #define QCRYPTO_BIG_NUMBER 9999999 /* a big number */ /* @@ -131,6 +132,7 @@ struct qcrypto_req_control { struct crypto_engine *pce; struct crypto_async_request *req; struct qcrypto_resp_ctx *arsp; + int res; /* execution result */ }; struct crypto_engine { @@ -167,8 +169,14 @@ struct crypto_engine { unsigned int max_req; struct qcrypto_req_control *preq_pool; atomic_t req_count; + bool issue_req; /* an request is being issued to qce */ + bool first_engine; /* this engine is the first engine or not */ + unsigned int irq_cpu; /* the cpu running the irq of this engine */ + unsigned int max_req_used; /* debug stats */ }; +#define MAX_SMP_CPU 8 + struct crypto_priv { /* CE features supported by target device*/ struct msm_ce_hw_support platform_support; @@ -208,21 +216,37 @@ struct crypto_priv { enum resp_workq_sts sched_resp_workq_status; enum req_processing_sts ce_req_proc_sts; int cpu_getting_irqs_frm_first_ce; + struct crypto_engine *first_engine; + struct crypto_engine *scheduled_eng; /* last engine scheduled */ + + /* debug stats */ + unsigned no_avail; + unsigned resp_stop; + unsigned resp_start; + unsigned max_qlen; + unsigned int queue_work_eng3; + unsigned int queue_work_not_eng3; + unsigned int queue_work_not_eng3_nz; + unsigned int max_resp_qlen; + unsigned int max_reorder_cnt; + unsigned int cpu_req[MAX_SMP_CPU+1]; }; static struct crypto_priv qcrypto_dev; static struct crypto_engine *_qcrypto_static_assign_engine( struct crypto_priv *cp); static struct crypto_engine *_avail_eng(struct crypto_priv *cp); - static struct qcrypto_req_control *qcrypto_alloc_req_control( struct crypto_engine *pce) { int i; struct qcrypto_req_control *pqcrypto_req_control = pce->preq_pool; + unsigned int req_count; for (i = 0; i < pce->max_req; i++) { if (xchg(&pqcrypto_req_control->in_use, true) == false) { - atomic_inc(&pce->req_count); + req_count = atomic_inc_return(&pce->req_count); + if (req_count > pce->max_req_used) + pce->max_req_used = req_count; return pqcrypto_req_control; } pqcrypto_req_control++; @@ -233,11 +257,13 @@ static struct qcrypto_req_control *qcrypto_alloc_req_control( static void qcrypto_free_req_control(struct crypto_engine *pce, struct qcrypto_req_control *preq) { + /* do this before free req */ + preq->req = NULL; + preq->arsp = NULL; + /* free req */ if (xchg(&preq->in_use, false) == false) { pr_warn("request info %p free already\n", preq); } else { - preq->req = NULL; - preq->arsp = NULL; atomic_dec(&pce->req_count); } } @@ -441,7 +467,9 @@ struct qcrypto_cipher_req_ctx { #define SHA_MAX_DIGEST_SIZE SHA256_DIGEST_SIZE #define MSM_QCRYPTO_REQ_QUEUE_LENGTH 768 -#define COMPLETION_CB_BACKLOG_LENGTH 768 +#define COMPLETION_CB_BACKLOG_LENGTH_STOP 400 +#define COMPLETION_CB_BACKLOG_LENGTH_START \ + (COMPLETION_CB_BACKLOG_LENGTH_STOP / 2) static uint8_t _std_init_vector_sha1_uint8[] = { 0x67, 0x45, 0x23, 0x01, 0xEF, 0xCD, 0xAB, 0x89, @@ -1066,6 +1094,7 @@ static int _disp_stats(int id) unsigned long flags; struct crypto_priv *cp = &qcrypto_dev; struct crypto_engine *pe; + int i; pstat = &_qcrypto_stat; len = scnprintf(_debug_read_buf, DEBUG_MAX_RW_BUF - 1, @@ -1187,6 +1216,18 @@ static int _disp_stats(int id) len += scnprintf(_debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1, " AHASH operation fail : %llu\n", pstat->ahash_op_fail); + len += scnprintf(_debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1, + " resp start, resp stop, max rsp queue reorder-cnt : %u %u %u %u\n", + cp->resp_start, cp->resp_stop, + cp->max_resp_qlen, cp->max_reorder_cnt); + len += scnprintf(_debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1, + " max queue legnth, no avail : %u %u\n", + cp->max_qlen, cp->no_avail); + len += scnprintf(_debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1, + " work queue : %u %u %u\n", + cp->queue_work_eng3, + cp->queue_work_not_eng3, + cp->queue_work_not_eng3_nz); len += scnprintf(_debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1, "\n"); spin_lock_irqsave(&cp->lock, flags); @@ -1194,8 +1235,9 @@ static int _disp_stats(int id) len += scnprintf( _debug_read_buf + len, DEBUG_MAX_RW_BUF - len - 1, - " Engine %4d Req : %llu\n", + " Engine %4d Req max %d : %llu\n", pe->unit, + pe->max_req_used, pe->total_req ); len += scnprintf( @@ -1208,6 +1250,14 @@ static int _disp_stats(int id) qce_get_driver_stats(pe->qce); } spin_unlock_irqrestore(&cp->lock, flags); + + for (i = 0; i < MAX_SMP_CPU+1; i++) + if (cp->cpu_req[i]) + len += scnprintf( + _debug_read_buf + len, + DEBUG_MAX_RW_BUF - len - 1, + "CPU %d Issue Req : %d\n", + i, cp->cpu_req[i]); return len; } @@ -1217,13 +1267,25 @@ static void _qcrypto_remove_engine(struct crypto_engine *pengine) struct qcrypto_alg *q_alg; struct qcrypto_alg *n; unsigned long flags; + struct crypto_engine *pe; cp = pengine->pcp; spin_lock_irqsave(&cp->lock, flags); list_del(&pengine->elist); + if (pengine->first_engine) { + cp->first_engine = NULL; + pe = list_first_entry(&cp->engine_list, struct crypto_engine, + elist); + if (pe) { + pe->first_engine = true; + cp->first_engine = pe; + } + } if (cp->next_engine == pengine) cp->next_engine = NULL; + if (cp->scheduled_eng == pengine) + cp->scheduled_eng = NULL; spin_unlock_irqrestore(&cp->lock, flags); cp->total_units--; @@ -1432,41 +1494,15 @@ static int _qcrypto_setkey_3des(struct crypto_ablkcipher *cipher, const u8 *key, return 0; }; -static struct crypto_engine *eng_sel_avoid_first(struct crypto_priv *cp) -{ - /* - * This function need not be spinlock protected when called from - * the seq_response workq as it will not have any contentions when all - * request processing is stopped. - */ - struct crypto_engine *p; - struct crypto_engine *q = NULL; - int max_user = QCRYPTO_BIG_NUMBER; - int use_cnt; - - if (unlikely(list_empty(&cp->engine_list))) { - pr_err("%s: no valid ce to schedule\n", __func__); - return NULL; - } - - p = list_first_entry(&cp->engine_list, struct crypto_engine, - elist); - list_for_each_entry_continue(p, &cp->engine_list, elist) { - use_cnt = atomic_read(&p->req_count); - if ((use_cnt < p->max_req) && (use_cnt < max_user)) { - q = p; - max_user = use_cnt; - } - } - return q; -} - static void seq_response(struct work_struct *work) { struct crypto_priv *cp = container_of(work, struct crypto_priv, resp_work); struct llist_node *list; struct llist_node *rev = NULL; + struct crypto_engine *pengine; + unsigned long flags; + int total_unit; again: list = llist_del_all(&cp->ordered_resp_list); @@ -1485,7 +1521,6 @@ again: while (rev) { struct qcrypto_resp_ctx *arsp; struct crypto_async_request *areq; - struct crypto_engine *pengine; arsp = container_of(rev, struct qcrypto_resp_ctx, llist); rev = llist_next(rev); @@ -1495,12 +1530,20 @@ again: areq->complete(areq, arsp->res); local_bh_enable(); atomic_dec(&cp->resp_cnt); - if (ACCESS_ONCE(cp->ce_req_proc_sts) == STOPPED && - atomic_read(&cp->resp_cnt) <= - (COMPLETION_CB_BACKLOG_LENGTH / 2)) { - pengine = eng_sel_avoid_first(cp); + } + + if (atomic_read(&cp->resp_cnt) < COMPLETION_CB_BACKLOG_LENGTH_START && + (cmpxchg(&cp->ce_req_proc_sts, STOPPED, IN_PROGRESS) + == STOPPED)) { + cp->resp_start++; + for (total_unit = cp->total_units; total_unit-- > 0;) { + spin_lock_irqsave(&cp->lock, flags); + pengine = _avail_eng(cp); + spin_unlock_irqrestore(&cp->lock, flags); if (pengine) _start_qcrypto_process(cp, pengine); + else + break; } } end: @@ -1512,12 +1555,19 @@ end: goto end; } -static void _qcrypto_tfm_complete(struct crypto_priv *cp, u32 type, - void *tfm_ctx) +#define SCHEUDLE_RSP_QLEN_THRESHOLD 64 + +static void _qcrypto_tfm_complete(struct crypto_engine *pengine, u32 type, + void *tfm_ctx, + struct qcrypto_resp_ctx *cur_arsp, + int res) { + struct crypto_priv *cp = pengine->pcp; unsigned long flags; struct qcrypto_resp_ctx *arsp; struct list_head *plist; + unsigned int resp_qlen; + unsigned int cnt = 0; switch (type) { case CRYPTO_ALG_TYPE_AHASH: @@ -1531,6 +1581,8 @@ static void _qcrypto_tfm_complete(struct crypto_priv *cp, u32 type, } spin_lock_irqsave(&cp->lock, flags); + + cur_arsp->res = res; while (!list_empty(plist)) { arsp = list_first_entry(plist, struct qcrypto_resp_ctx, list); @@ -1539,16 +1591,51 @@ static void _qcrypto_tfm_complete(struct crypto_priv *cp, u32 type, else { list_del(&arsp->list); llist_add(&arsp->llist, &cp->ordered_resp_list); + atomic_inc(&cp->resp_cnt); + cnt++; } } + resp_qlen = atomic_read(&cp->resp_cnt); + if (resp_qlen > cp->max_resp_qlen) + cp->max_resp_qlen = resp_qlen; + if (cnt > cp->max_reorder_cnt) + cp->max_reorder_cnt = cnt; + if ((resp_qlen >= COMPLETION_CB_BACKLOG_LENGTH_STOP) && + cmpxchg(&cp->ce_req_proc_sts, IN_PROGRESS, + STOPPED) == IN_PROGRESS) { + cp->resp_stop++; + } + spin_unlock_irqrestore(&cp->lock, flags); retry: if (!llist_empty(&cp->ordered_resp_list)) { + unsigned int cpu; + + if (pengine->first_engine) { + cpu = WORK_CPU_UNBOUND; + cp->queue_work_eng3++; + } else { + cp->queue_work_not_eng3++; + cpu = cp->cpu_getting_irqs_frm_first_ce; + /* + * If source not the first engine, and there + * are outstanding requests going on first engine, + * skip scheduling of work queue to anticipate + * more may be coming. If the response queue + * length exceeds threshold, to avoid further + * delay, schedule work queue immediately. + */ + if (cp->first_engine && atomic_read( + &cp->first_engine->req_count)) { + if (resp_qlen < SCHEUDLE_RSP_QLEN_THRESHOLD) + return; + cp->queue_work_not_eng3_nz++; + } + } if (cmpxchg(&cp->sched_resp_workq_status, NOT_SCHEDULED, IS_SCHEDULED) == NOT_SCHEDULED) - queue_work_on(cp->cpu_getting_irqs_frm_first_ce, - cp->resp_wq, &cp->resp_work); + queue_work_on(cpu, cp->resp_wq, &cp->resp_work); else if (cmpxchg(&cp->sched_resp_workq_status, IS_SCHEDULED, SCHEDULE_AGAIN) == NOT_SCHEDULED) goto retry; @@ -1559,36 +1646,34 @@ static void req_done(struct qcrypto_req_control *pqcrypto_req_control) { struct crypto_engine *pengine; struct crypto_async_request *areq; - struct crypto_engine *pe; struct crypto_priv *cp; - unsigned long flags; struct qcrypto_resp_ctx *arsp; u32 type = 0; void *tfm_ctx = NULL; + unsigned int cpu; + int res; pengine = pqcrypto_req_control->pce; cp = pengine->pcp; - spin_lock_irqsave(&cp->lock, flags); areq = pqcrypto_req_control->req; arsp = pqcrypto_req_control->arsp; + res = pqcrypto_req_control->res; qcrypto_free_req_control(pengine, pqcrypto_req_control); if (areq) { type = crypto_tfm_alg_type(areq->tfm); tfm_ctx = crypto_tfm_ctx(areq->tfm); } - pe = list_first_entry(&cp->engine_list, struct crypto_engine, elist); - if (pe == pengine) - if (cp->cpu_getting_irqs_frm_first_ce != smp_processor_id()) - cp->cpu_getting_irqs_frm_first_ce = smp_processor_id(); - spin_unlock_irqrestore(&cp->lock, flags); - if (atomic_read(&cp->resp_cnt) <= COMPLETION_CB_BACKLOG_LENGTH) { - cmpxchg(&cp->ce_req_proc_sts, STOPPED, IN_PROGRESS); - _start_qcrypto_process(cp, pengine); - } else - cmpxchg(&cp->ce_req_proc_sts, IN_PROGRESS, STOPPED); + cpu = smp_processor_id(); + pengine->irq_cpu = cpu; + if (pengine->first_engine) { + if (cpu != cp->cpu_getting_irqs_frm_first_ce) + cp->cpu_getting_irqs_frm_first_ce = cpu; + } if (areq) - _qcrypto_tfm_complete(cp, type, tfm_ctx); + _qcrypto_tfm_complete(pengine, type, tfm_ctx, arsp, res); + if (ACCESS_ONCE(cp->ce_req_proc_sts) == IN_PROGRESS) + _start_qcrypto_process(cp, pengine); } static void _qce_ahash_complete(void *cookie, unsigned char *digest, @@ -1639,10 +1724,10 @@ static void _qce_ahash_complete(void *cookie, unsigned char *digest, rctx->first_blk = 0; if (ret) { - pqcrypto_req_control->arsp->res = -ENXIO; + pqcrypto_req_control->res = -ENXIO; pstat->ahash_op_fail++; } else { - pqcrypto_req_control->arsp->res = 0; + pqcrypto_req_control->res = 0; pstat->ahash_op_success++; } if (cp->ce_support.aligned_only) { @@ -1684,10 +1769,10 @@ static void _qce_ablk_cipher_complete(void *cookie, unsigned char *icb, memcpy(ctx->iv, iv, crypto_ablkcipher_ivsize(ablk)); if (ret) { - pqcrypto_req_control->arsp->res = -ENXIO; + pqcrypto_req_control->res = -ENXIO; pstat->ablk_cipher_op_fail++; } else { - pqcrypto_req_control->arsp->res = 0; + pqcrypto_req_control->res = 0; pstat->ablk_cipher_op_success++; } @@ -1773,7 +1858,7 @@ static void _qce_aead_complete(void *cookie, unsigned char *icv, else pstat->aead_op_success++; - pqcrypto_req_control->arsp->res = ret; + pqcrypto_req_control->res = ret; req_done(pqcrypto_req_control); } @@ -2100,12 +2185,24 @@ static int _start_qcrypto_process(struct crypto_priv *cp, struct aead_request *aead_req; struct qcrypto_resp_ctx *arsp; struct qcrypto_req_control *pqcrypto_req_control; + unsigned int cpu = MAX_SMP_CPU; + + if (ACCESS_ONCE(cp->ce_req_proc_sts) == STOPPED) + return 0; + + if (in_interrupt()) { + cpu = smp_processor_id(); + if (cpu >= MAX_SMP_CPU) + cpu = MAX_SMP_CPU - 1; + } else + cpu = MAX_SMP_CPU; pstat = &_qcrypto_stat; again: spin_lock_irqsave(&cp->lock, flags); - if (atomic_read(&pengine->req_count) >= (pengine->max_req)) { + if (pengine->issue_req || + atomic_read(&pengine->req_count) >= (pengine->max_req)) { spin_unlock_irqrestore(&cp->lock, flags); return 0; } @@ -2176,7 +2273,6 @@ again: break; } - atomic_inc(&cp->resp_cnt); arsp->res = -EINPROGRESS; arsp->async_req = async_req; pqcrypto_req_control->pce = pengine; @@ -2185,6 +2281,10 @@ again: pengine->active_seq++; pengine->check_flag = true; + pengine->issue_req = true; + cp->cpu_req[cpu]++; + smp_mb(); /* make it visible */ + spin_unlock_irqrestore(&cp->lock, flags); if (backlog_eng) backlog_eng->complete(backlog_eng, -EINPROGRESS); @@ -2204,9 +2304,12 @@ again: default: ret = -EINVAL; }; + + pengine->issue_req = false; + smp_mb(); /* make it visible */ + pengine->total_req++; if (ret) { - arsp->res = ret; pengine->err_req++; qcrypto_free_req_control(pengine, pqcrypto_req_control); @@ -2218,32 +2321,48 @@ again: else pstat->aead_op_fail++; - _qcrypto_tfm_complete(cp, type, tfm_ctx); + _qcrypto_tfm_complete(pengine, type, tfm_ctx, arsp, ret); goto again; }; return ret; } +static inline struct crypto_engine *_next_eng(struct crypto_priv *cp, + struct crypto_engine *p) +{ + + if (p == NULL || list_is_last(&p->elist, &cp->engine_list)) + p = list_first_entry(&cp->engine_list, struct crypto_engine, + elist); + else + p = list_entry(p->elist.next, struct crypto_engine, elist); + return p; +} static struct crypto_engine *_avail_eng(struct crypto_priv *cp) { /* call this function with spinlock set */ - struct crypto_engine *p; struct crypto_engine *q = NULL; - int max_user = QCRYPTO_BIG_NUMBER; - int use_cnt; + struct crypto_engine *p = cp->scheduled_eng; + struct crypto_engine *q1; + int eng_cnt = cp->total_units; if (unlikely(list_empty(&cp->engine_list))) { pr_err("%s: no valid ce to schedule\n", __func__); return NULL; } - list_for_each_entry(p, &cp->engine_list, elist) { - use_cnt = atomic_read(&p->req_count); - if ((use_cnt < p->max_req) && (use_cnt < max_user)) { + p = _next_eng(cp, p); + q1 = p; + while (eng_cnt-- > 0) { + if (!p->issue_req && atomic_read(&p->req_count) < p->max_req) { q = p; - max_user = use_cnt; + break; } + p = _next_eng(cp, p); + if (q1 == p) + break; } + cp->scheduled_eng = q; return q; } @@ -2261,6 +2380,8 @@ static int _qcrypto_queue_req(struct crypto_priv *cp, } else { ret = crypto_enqueue_request(&cp->req_queue, req); pengine = _avail_eng(cp); + if (cp->req_queue.qlen > cp->max_qlen) + cp->max_qlen = cp->req_queue.qlen; } if (pengine) { switch (pengine->bw_state) { @@ -2286,16 +2407,12 @@ static int _qcrypto_queue_req(struct crypto_priv *cp, pengine = NULL; break; } + } else { + cp->no_avail++; } spin_unlock_irqrestore(&cp->lock, flags); - if (pengine) { - if (atomic_read(&cp->resp_cnt) <= - COMPLETION_CB_BACKLOG_LENGTH) { - cmpxchg(&cp->ce_req_proc_sts, STOPPED, IN_PROGRESS); - _start_qcrypto_process(cp, pengine); - } else - cmpxchg(&cp->ce_req_proc_sts, IN_PROGRESS, STOPPED); - } + if (pengine && (ACCESS_ONCE(cp->ce_req_proc_sts) == IN_PROGRESS)) + _start_qcrypto_process(cp, pengine); return ret; } @@ -4762,6 +4879,8 @@ static int _qcrypto_probe(struct platform_device *pdev) pengine->active_seq = 0; pengine->last_active_seq = 0; pengine->check_flag = false; + pengine->max_req_used = 0; + pengine->issue_req = false; crypto_init_queue(&pengine->req_queue, MSM_QCRYPTO_REQ_QUEUE_LENGTH); @@ -4770,6 +4889,9 @@ static int _qcrypto_probe(struct platform_device *pdev) pengine->unit = cp->total_units; spin_lock_irqsave(&cp->lock, flags); + pengine->first_engine = list_empty(&cp->engine_list); + if (pengine->first_engine) + cp->first_engine = pengine; list_add_tail(&pengine->elist, &cp->engine_list); cp->next_engine = pengine; spin_unlock_irqrestore(&cp->lock, flags); @@ -5292,6 +5414,7 @@ static ssize_t _debug_stats_write(struct file *file, const char __user *buf, unsigned long flags; struct crypto_priv *cp = &qcrypto_dev; struct crypto_engine *pe; + int i; memset((char *)&_qcrypto_stat, 0, sizeof(struct crypto_stat)); spin_lock_irqsave(&cp->lock, flags); @@ -5299,7 +5422,19 @@ static ssize_t _debug_stats_write(struct file *file, const char __user *buf, pe->total_req = 0; pe->err_req = 0; qce_clear_driver_stats(pe->qce); + pe->max_req_used = 0; } + cp->max_qlen = 0; + cp->resp_start = 0; + cp->resp_stop = 0; + cp->no_avail = 0; + cp->max_resp_qlen = 0; + cp->queue_work_eng3 = 0; + cp->queue_work_not_eng3 = 0; + cp->queue_work_not_eng3_nz = 0; + cp->max_reorder_cnt = 0; + for (i = 0; i < MAX_SMP_CPU + 1; i++) + cp->cpu_req[i] = 0; spin_unlock_irqrestore(&cp->lock, flags); return count; } @@ -5362,6 +5497,8 @@ static int __init _qcrypto_init(void) pcp->total_units = 0; pcp->platform_support.bus_scale_table = NULL; pcp->next_engine = NULL; + pcp->scheduled_eng = NULL; + pcp->ce_req_proc_sts = IN_PROGRESS; crypto_init_queue(&pcp->req_queue, MSM_QCRYPTO_REQ_QUEUE_LENGTH); return platform_driver_register(&_qualcomm_crypto); }