drm/amdkfd: Allow user to limit only queues per device
authorOded Gabbay <oded.gabbay@amd.com>
Sun, 18 Jan 2015 11:18:01 +0000 (13:18 +0200)
committerOded Gabbay <oded.gabbay@amd.com>
Sun, 18 Jan 2015 11:18:01 +0000 (13:18 +0200)
This patch replaces the two current amdkfd module parameters with a new one.

The current parameters that are being replaced are:

- Maximum number of HSA processes
- Maximum number of queues per process

The new parameter that replaces them is called "Maximum queues per device"

This replacement achieves two goals:

- Allows the user to have as many HSA processes as it wants (until
  a maximum of 512 HSA processes in Kaveri).

- Removes the limitation the user had on maximum number of queues per HSA
  process. E.g. the user can now have processes which only have one queue and
  other processes which have hundreds of queues, while before the user
  couldn't have more than 128 queues per process (as default).

The default value of the new parameter is 4096 (32 * 128, which were the
defaults of the old parameters). There is almost no additional GART memory
required for the default case. As a reminder, this amount of queues requires a
little bit below 4MB of GART memory.

v2:
In addition, This patch defines a new counter for queues accounting in the DQM
structure. This is done because the current counter only counts active queues
which allows the user to create more queues than the
max_num_of_queues_per_device module parameter allows.

However, we need the current counter for the runlist packet build process, so
the solution is to have a dedicated counter for this accounting.

Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
Reviewed-by: Ben Goz <ben.goz@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_module.c
drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index 633532a2e7ec875b968343975c2d49d69d220abc..25bc47f3c1cf53d0181d0bc63f3c381f0502f138 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers.h"
 
 #define MQD_SIZE_ALIGNED 768
 
@@ -169,9 +170,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
        kfd->shared_resources = *gpu_resources;
 
        /* calculate max size of mqds needed for queues */
-       size = max_num_of_processes *
-               max_num_of_queues_per_process *
-               kfd->device_info->mqd_size_aligned;
+       size = max_num_of_queues_per_device *
+                       kfd->device_info->mqd_size_aligned;
 
        /* add another 512KB for all other allocations on gart */
        size += 512 * 1024;
index 30c8fda9622e507f0fabc0b2386d2240b812360c..b9626ae079603b7c91b4bc59632d4dd2d5c65096 100644 (file)
@@ -183,6 +183,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 
        mutex_lock(&dqm->lock);
 
+       if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+               pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+                               dqm->total_queue_count);
+               mutex_unlock(&dqm->lock);
+               return -EPERM;
+       }
+
        if (list_empty(&qpd->queues_list)) {
                retval = allocate_vmid(dqm, qpd, q);
                if (retval != 0) {
@@ -207,6 +214,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
        list_add(&q->list, &qpd->queues_list);
        dqm->queue_count++;
 
+       /*
+        * Unconditionally increment this counter, regardless of the queue's
+        * type or whether the queue is active.
+        */
+       dqm->total_queue_count++;
+       pr_debug("Total of %d queues are accountable so far\n",
+                       dqm->total_queue_count);
+
        mutex_unlock(&dqm->lock);
        return 0;
 }
@@ -326,6 +341,15 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
        if (list_empty(&qpd->queues_list))
                deallocate_vmid(dqm, qpd, q);
        dqm->queue_count--;
+
+       /*
+        * Unconditionally decrement this counter, regardless of the queue's
+        * type
+        */
+       dqm->total_queue_count--;
+       pr_debug("Total of %d queues are accountable so far\n",
+                       dqm->total_queue_count);
+
 out:
        mutex_unlock(&dqm->lock);
        return retval;
@@ -752,6 +776,21 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
        pr_debug("kfd: In func %s\n", __func__);
 
        mutex_lock(&dqm->lock);
+       if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+               pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
+                               dqm->total_queue_count);
+               mutex_unlock(&dqm->lock);
+               return -EPERM;
+       }
+
+       /*
+        * Unconditionally increment this counter, regardless of the queue's
+        * type or whether the queue is active.
+        */
+       dqm->total_queue_count++;
+       pr_debug("Total of %d queues are accountable so far\n",
+                       dqm->total_queue_count);
+
        list_add(&kq->list, &qpd->priv_queue_list);
        dqm->queue_count++;
        qpd->is_debug = true;
@@ -775,6 +814,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
        dqm->queue_count--;
        qpd->is_debug = false;
        execute_queues_cpsch(dqm, false);
+       /*
+        * Unconditionally decrement this counter, regardless of the queue's
+        * type.
+        */
+       dqm->total_queue_count++;
+       pr_debug("Total of %d queues are accountable so far\n",
+                       dqm->total_queue_count);
        mutex_unlock(&dqm->lock);
 }
 
@@ -793,6 +839,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 
        mutex_lock(&dqm->lock);
 
+       if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+               pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+                               dqm->total_queue_count);
+               retval = -EPERM;
+               goto out;
+       }
+
        mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
        if (mqd == NULL) {
                mutex_unlock(&dqm->lock);
@@ -810,6 +863,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
                retval = execute_queues_cpsch(dqm, false);
        }
 
+       /*
+        * Unconditionally increment this counter, regardless of the queue's
+        * type or whether the queue is active.
+        */
+       dqm->total_queue_count++;
+
+       pr_debug("Total of %d queues are accountable so far\n",
+                       dqm->total_queue_count);
+
 out:
        mutex_unlock(&dqm->lock);
        return retval;
@@ -930,6 +992,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
        mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 
+       /*
+        * Unconditionally decrement this counter, regardless of the queue's
+        * type
+        */
+       dqm->total_queue_count--;
+       pr_debug("Total of %d queues are accountable so far\n",
+                       dqm->total_queue_count);
+
        mutex_unlock(&dqm->lock);
 
        return 0;
index c3f189e8ae35da5527efebe3ae016b47b2b3342c..52035bf0c1cb3ce896901573491a071c17c8bcea 100644 (file)
@@ -130,6 +130,7 @@ struct device_queue_manager {
        struct list_head        queues;
        unsigned int            processes_count;
        unsigned int            queue_count;
+       unsigned int            total_queue_count;
        unsigned int            next_pipe_to_allocate;
        unsigned int            *allocated_queues;
        unsigned int            vmid_bitmap;
index 95d5af138e6e7f2bcbd8d76351f7e12827031189..a8be6df8534753fbaed2103a57a3f054838bc29a 100644 (file)
@@ -50,15 +50,10 @@ module_param(sched_policy, int, 0444);
 MODULE_PARM_DESC(sched_policy,
        "Kernel cmdline parameter that defines the amdkfd scheduling policy");
 
-int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT;
-module_param(max_num_of_processes, int, 0444);
-MODULE_PARM_DESC(max_num_of_processes,
-       "Kernel cmdline parameter that defines the amdkfd maximum number of supported processes");
-
-int max_num_of_queues_per_process = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT;
-module_param(max_num_of_queues_per_process, int, 0444);
-MODULE_PARM_DESC(max_num_of_queues_per_process,
-       "Kernel cmdline parameter that defines the amdkfd maximum number of supported queues per process");
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
+module_param(max_num_of_queues_per_device, int, 0444);
+MODULE_PARM_DESC(max_num_of_queues_per_device,
+       "Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
 
 bool kgd2kfd_init(unsigned interface_version,
                  const struct kfd2kgd_calls *f2g,
@@ -100,16 +95,10 @@ static int __init kfd_module_init(void)
        }
 
        /* Verify module parameters */
-       if ((max_num_of_processes < 0) ||
-               (max_num_of_processes > KFD_MAX_NUM_OF_PROCESSES)) {
-               pr_err("kfd: max_num_of_processes must be between 0 to KFD_MAX_NUM_OF_PROCESSES\n");
-               return -1;
-       }
-
-       if ((max_num_of_queues_per_process < 0) ||
-               (max_num_of_queues_per_process >
-                       KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)) {
-               pr_err("kfd: max_num_of_queues_per_process must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_PROCESS\n");
+       if ((max_num_of_queues_per_device < 0) ||
+               (max_num_of_queues_per_device >
+                       KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
+               pr_err("kfd: max_num_of_queues_per_device must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
                return -1;
        }
 
index 4c25ef504f79dd6be067369a1c216b11db98fc2e..6cfe7f1f18cff0d805a75097a5a9f86e2a4fcdfd 100644 (file)
@@ -30,7 +30,7 @@ static DEFINE_MUTEX(pasid_mutex);
 
 int kfd_pasid_init(void)
 {
-       pasid_limit = max_num_of_processes;
+       pasid_limit = KFD_MAX_NUM_OF_PROCESSES;
 
        pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
        if (!pasid_bitmap)
index b3dc13c83169c1a3fb5cd54c9ae179aaf9508d36..96dc10e8904afc3cd2e5a8069ca8fb0a1eda2e6b 100644 (file)
 #define kfd_alloc_struct(ptr_to_struct)        \
        ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
 
-/* Kernel module parameter to specify maximum number of supported processes */
-extern int max_num_of_processes;
-
-#define KFD_MAX_NUM_OF_PROCESSES_DEFAULT 32
 #define KFD_MAX_NUM_OF_PROCESSES 512
+#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
 
 /*
- * Kernel module parameter to specify maximum number of supported queues
- * per process
+ * Kernel module parameter to specify maximum number of supported queues per
+ * device
  */
-extern int max_num_of_queues_per_process;
+extern int max_num_of_queues_per_device;
 
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT 128
-#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE               \
+       (KFD_MAX_NUM_OF_PROCESSES *                     \
+                       KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
 
 #define KFD_KERNEL_QUEUE_SIZE 2048
 
index 326d26881afd5cb4ca0ef6e38dbc11a88e4cb96a..f37cf5efe642ca23b42fc45de8ca0c69617d9937 100644 (file)
@@ -54,11 +54,11 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
        pr_debug("kfd: in %s\n", __func__);
 
        found = find_first_zero_bit(pqm->queue_slot_bitmap,
-                       max_num_of_queues_per_process);
+                       KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
 
        pr_debug("kfd: the new slot id %lu\n", found);
 
-       if (found >= max_num_of_queues_per_process) {
+       if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
                pr_info("amdkfd: Can not open more queues for process with pasid %d\n",
                                pqm->process->pasid);
                return -ENOMEM;
@@ -76,7 +76,7 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 
        INIT_LIST_HEAD(&pqm->queues);
        pqm->queue_slot_bitmap =
-                       kzalloc(DIV_ROUND_UP(max_num_of_queues_per_process,
+                       kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
                                        BITS_PER_BYTE), GFP_KERNEL);
        if (pqm->queue_slot_bitmap == NULL)
                return -ENOMEM;
@@ -203,6 +203,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
                pqn->kq = NULL;
                retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd,
                                                &q->properties.vmid);
+               pr_debug("DQM returned %d for create_queue\n", retval);
                print_queue(q);
                break;
        case KFD_QUEUE_TYPE_DIQ:
@@ -222,7 +223,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
        }
 
        if (retval != 0) {
-               pr_err("kfd: error dqm create queue\n");
+               pr_debug("Error dqm create queue\n");
                goto err_create_queue;
        }