Back to index

libdrm  2.4.37
radeon_cs_gem.c
Go to the documentation of this file.
00001 /*
00002  * Copyright © 2008 Jérôme Glisse
00003  * All Rights Reserved.
00004  *
00005  * Permission is hereby granted, free of charge, to any person obtaining
00006  * a copy of this software and associated documentation files (the
00007  * "Software"), to deal in the Software without restriction, including
00008  * without limitation the rights to use, copy, modify, merge, publish,
00009  * distribute, sub license, and/or sell copies of the Software, and to
00010  * permit persons to whom the Software is furnished to do so, subject to
00011  * the following conditions:
00012  *
00013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00014  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
00015  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00016  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
00017  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00018  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
00019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
00020  * USE OR OTHER DEALINGS IN THE SOFTWARE.
00021  *
00022  * The above copyright notice and this permission notice (including the
00023  * next paragraph) shall be included in all copies or substantial portions
00024  * of the Software.
00025  */
00026 /*
00027  * Authors:
00028  *      Aapo Tahkola <aet@rasterburn.org>
00029  *      Nicolai Haehnle <prefect_@gmx.net>
00030  *      Jérôme Glisse <glisse@freedesktop.org>
00031  */
00032 #include <assert.h>
00033 #include <errno.h>
00034 #include <stdlib.h>
00035 #include <string.h>
00036 #include <pthread.h>
00037 #include <sys/mman.h>
00038 #include <sys/ioctl.h>
00039 #include "radeon_cs.h"
00040 #include "radeon_cs_int.h"
00041 #include "radeon_bo_int.h"
00042 #include "radeon_cs_gem.h"
00043 #include "radeon_bo_gem.h"
00044 #include "drm.h"
00045 #include "xf86drm.h"
00046 #include "xf86atomic.h"
00047 #include "radeon_drm.h"
00048 #include "bof.h"
00049 
00050 #define CS_BOF_DUMP 0
00051 
00052 struct radeon_cs_manager_gem {
00053     struct radeon_cs_manager    base;
00054     uint32_t                    device_id;
00055     unsigned                    nbof;
00056 };
00057 
00058 #pragma pack(1)
00059 struct cs_reloc_gem {
00060     uint32_t    handle;
00061     uint32_t    read_domain;
00062     uint32_t    write_domain;
00063     uint32_t    flags;
00064 };
00065 
00066 #pragma pack()
00067 #define RELOC_SIZE (sizeof(struct cs_reloc_gem) / sizeof(uint32_t))
00068 
00069 struct cs_gem {
00070     struct radeon_cs_int        base;
00071     struct drm_radeon_cs        cs;
00072     struct drm_radeon_cs_chunk  chunks[2];
00073     unsigned                    nrelocs;
00074     uint32_t                    *relocs;
00075     struct radeon_bo_int        **relocs_bo;
00076 };
00077 
00078 static pthread_mutex_t id_mutex = PTHREAD_MUTEX_INITIALIZER;
00079 static uint32_t cs_id_source = 0;
00080 
00084 static uint32_t get_first_zero(const uint32_t n)
00085 {
00086     /* __builtin_ctz returns number of trailing zeros. */
00087     return 1 << __builtin_ctz(~n);
00088 }
00089 
00094 static uint32_t generate_id(void)
00095 {
00096     uint32_t r = 0;
00097     pthread_mutex_lock( &id_mutex );
00098     /* check for free ids */
00099     if (cs_id_source != ~r) {
00100         /* find first zero bit */
00101         r = get_first_zero(cs_id_source);
00102 
00103         /* set id as reserved */
00104         cs_id_source |= r;
00105     }
00106     pthread_mutex_unlock( &id_mutex );
00107     return r;
00108 }
00109 
00113 static void free_id(uint32_t id)
00114 {
00115     pthread_mutex_lock( &id_mutex );
00116 
00117     cs_id_source &= ~id;
00118 
00119     pthread_mutex_unlock( &id_mutex );
00120 }
00121 
00122 static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm,
00123                                        uint32_t ndw)
00124 {
00125     struct cs_gem *csg;
00126 
00127     /* max cmd buffer size is 64Kb */
00128     if (ndw > (64 * 1024 / 4)) {
00129         return NULL;
00130     }
00131     csg = (struct cs_gem*)calloc(1, sizeof(struct cs_gem));
00132     if (csg == NULL) {
00133         return NULL;
00134     }
00135     csg->base.csm = csm;
00136     csg->base.ndw = 64 * 1024 / 4;
00137     csg->base.packets = (uint32_t*)calloc(1, 64 * 1024);
00138     if (csg->base.packets == NULL) {
00139         free(csg);
00140         return NULL;
00141     }
00142     csg->base.relocs_total_size = 0;
00143     csg->base.crelocs = 0;
00144     csg->base.id = generate_id();
00145     csg->nrelocs = 4096 / (4 * 4) ;
00146     csg->relocs_bo = (struct radeon_bo_int**)calloc(1,
00147                                                 csg->nrelocs*sizeof(void*));
00148     if (csg->relocs_bo == NULL) {
00149         free(csg->base.packets);
00150         free(csg);
00151         return NULL;
00152     }
00153     csg->base.relocs = csg->relocs = (uint32_t*)calloc(1, 4096);
00154     if (csg->relocs == NULL) {
00155         free(csg->relocs_bo);
00156         free(csg->base.packets);
00157         free(csg);
00158         return NULL;
00159     }
00160     csg->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
00161     csg->chunks[0].length_dw = 0;
00162     csg->chunks[0].chunk_data = (uint64_t)(uintptr_t)csg->base.packets;
00163     csg->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
00164     csg->chunks[1].length_dw = 0;
00165     csg->chunks[1].chunk_data = (uint64_t)(uintptr_t)csg->relocs;
00166     return (struct radeon_cs_int*)csg;
00167 }
00168 
00169 static int cs_gem_write_reloc(struct radeon_cs_int *cs,
00170                               struct radeon_bo *bo,
00171                               uint32_t read_domain,
00172                               uint32_t write_domain,
00173                               uint32_t flags)
00174 {
00175     struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
00176     struct cs_gem *csg = (struct cs_gem*)cs;
00177     struct cs_reloc_gem *reloc;
00178     uint32_t idx;
00179     unsigned i;
00180 
00181     assert(boi->space_accounted);
00182 
00183     /* check domains */
00184     if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
00185         /* in one CS a bo can only be in read or write domain but not
00186          * in read & write domain at the same sime
00187          */
00188         return -EINVAL;
00189     }
00190     if (read_domain == RADEON_GEM_DOMAIN_CPU) {
00191         return -EINVAL;
00192     }
00193     if (write_domain == RADEON_GEM_DOMAIN_CPU) {
00194         return -EINVAL;
00195     }
00196     /* use bit field hash function to determine
00197        if this bo is for sure not in this cs.*/
00198     if ((atomic_read((atomic_t *)radeon_gem_get_reloc_in_cs(bo)) & cs->id)) {
00199         /* check if bo is already referenced.
00200          * Scanning from end to begin reduces cycles with mesa because
00201          * it often relocates same shared dma bo again. */
00202         for(i = cs->crelocs; i != 0;) {
00203             --i;
00204             idx = i * RELOC_SIZE;
00205             reloc = (struct cs_reloc_gem*)&csg->relocs[idx];
00206             if (reloc->handle == bo->handle) {
00207                 /* Check domains must be in read or write. As we check already
00208                  * checked that in argument one of the read or write domain was
00209                  * set we only need to check that if previous reloc as the read
00210                  * domain set then the read_domain should also be set for this
00211                  * new relocation.
00212                  */
00213                 /* the DDX expects to read and write from same pixmap */
00214                 if (write_domain && (reloc->read_domain & write_domain)) {
00215                     reloc->read_domain = 0;
00216                     reloc->write_domain = write_domain;
00217                 } else if (read_domain & reloc->write_domain) {
00218                     reloc->read_domain = 0;
00219                 } else {
00220                     if (write_domain != reloc->write_domain)
00221                         return -EINVAL;
00222                     if (read_domain != reloc->read_domain)
00223                         return -EINVAL;
00224                 }
00225 
00226                 reloc->read_domain |= read_domain;
00227                 reloc->write_domain |= write_domain;
00228                 /* update flags */
00229                 reloc->flags |= (flags & reloc->flags);
00230                 /* write relocation packet */
00231                 radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
00232                 radeon_cs_write_dword((struct radeon_cs *)cs, idx);
00233                 return 0;
00234             }
00235         }
00236     }
00237     /* new relocation */
00238     if (csg->base.crelocs >= csg->nrelocs) {
00239         /* allocate more memory (TODO: should use a slab allocatore maybe) */
00240         uint32_t *tmp, size;
00241         size = ((csg->nrelocs + 1) * sizeof(struct radeon_bo*));
00242         tmp = (uint32_t*)realloc(csg->relocs_bo, size);
00243         if (tmp == NULL) {
00244             return -ENOMEM;
00245         }
00246         csg->relocs_bo = (struct radeon_bo_int **)tmp;
00247         size = ((csg->nrelocs + 1) * RELOC_SIZE * 4);
00248         tmp = (uint32_t*)realloc(csg->relocs, size);
00249         if (tmp == NULL) {
00250             return -ENOMEM;
00251         }
00252         cs->relocs = csg->relocs = tmp;
00253         csg->nrelocs += 1;
00254         csg->chunks[1].chunk_data = (uint64_t)(uintptr_t)csg->relocs;
00255     }
00256     csg->relocs_bo[csg->base.crelocs] = boi;
00257     idx = (csg->base.crelocs++) * RELOC_SIZE;
00258     reloc = (struct cs_reloc_gem*)&csg->relocs[idx];
00259     reloc->handle = bo->handle;
00260     reloc->read_domain = read_domain;
00261     reloc->write_domain = write_domain;
00262     reloc->flags = flags;
00263     csg->chunks[1].length_dw += RELOC_SIZE;
00264     radeon_bo_ref(bo);
00265     /* bo might be referenced from another context so have to use atomic opertions */
00266     atomic_add((atomic_t *)radeon_gem_get_reloc_in_cs(bo), cs->id);
00267     cs->relocs_total_size += boi->size;
00268     radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
00269     radeon_cs_write_dword((struct radeon_cs *)cs, idx);
00270     return 0;
00271 }
00272 
00273 static int cs_gem_begin(struct radeon_cs_int *cs,
00274                         uint32_t ndw,
00275                         const char *file,
00276                         const char *func,
00277                         int line)
00278 {
00279 
00280     if (cs->section_ndw) {
00281         fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
00282                 cs->section_file, cs->section_func, cs->section_line);
00283         fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
00284                 file, func, line);
00285         return -EPIPE;
00286     }
00287     cs->section_ndw = ndw;
00288     cs->section_cdw = 0;
00289     cs->section_file = file;
00290     cs->section_func = func;
00291     cs->section_line = line;
00292 
00293     if (cs->cdw + ndw > cs->ndw) {
00294         uint32_t tmp, *ptr;
00295 
00296         /* round up the required size to a multiple of 1024 */
00297         tmp = (cs->cdw + ndw + 0x3FF) & (~0x3FF);
00298         ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
00299         if (ptr == NULL) {
00300             return -ENOMEM;
00301         }
00302         cs->packets = ptr;
00303         cs->ndw = tmp;
00304     }
00305     return 0;
00306 }
00307 
00308 static int cs_gem_end(struct radeon_cs_int *cs,
00309                       const char *file,
00310                       const char *func,
00311                       int line)
00312 
00313 {
00314     if (!cs->section_ndw) {
00315         fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
00316                 file, func, line);
00317         return -EPIPE;
00318     }
00319     if (cs->section_ndw != cs->section_cdw) {
00320         fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
00321                 cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
00322         fprintf(stderr, "CS section end at (%s,%s,%d)\n",
00323                 file, func, line);
00324 
00325         /* We must reset the section even when there is error. */
00326         cs->section_ndw = 0;
00327         return -EPIPE;
00328     }
00329     cs->section_ndw = 0;
00330     return 0;
00331 }
00332 
00333 static void cs_gem_dump_bof(struct radeon_cs_int *cs)
00334 {
00335     struct cs_gem *csg = (struct cs_gem*)cs;
00336     struct radeon_cs_manager_gem *csm;
00337     bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root;
00338     char tmp[256];
00339     unsigned i;
00340 
00341     csm = (struct radeon_cs_manager_gem *)cs->csm;
00342     root = device_id = bcs = blob = array = bo = size = handle = NULL;
00343     root = bof_object();
00344     if (root == NULL)
00345         goto out_err;
00346     device_id = bof_int32(csm->device_id);
00347     if (device_id == NULL)
00348         return;
00349     if (bof_object_set(root, "device_id", device_id))
00350         goto out_err;
00351     bof_decref(device_id);
00352     device_id = NULL;
00353     /* dump relocs */
00354     blob = bof_blob(csg->nrelocs * 16, csg->relocs);
00355     if (blob == NULL)
00356         goto out_err;
00357     if (bof_object_set(root, "reloc", blob))
00358         goto out_err;
00359     bof_decref(blob);
00360     blob = NULL;
00361     /* dump cs */
00362     blob = bof_blob(cs->cdw * 4, cs->packets);
00363     if (blob == NULL)
00364         goto out_err;
00365     if (bof_object_set(root, "pm4", blob))
00366         goto out_err;
00367     bof_decref(blob);
00368     blob = NULL;
00369     /* dump bo */
00370     array = bof_array();
00371     if (array == NULL)
00372         goto out_err;
00373     for (i = 0; i < csg->base.crelocs; i++) {
00374         bo = bof_object();
00375         if (bo == NULL)
00376             goto out_err;
00377         size = bof_int32(csg->relocs_bo[i]->size);
00378         if (size == NULL)
00379             goto out_err;
00380         if (bof_object_set(bo, "size", size))
00381             goto out_err;
00382         bof_decref(size);
00383         size = NULL;
00384         handle = bof_int32(csg->relocs_bo[i]->handle);
00385         if (handle == NULL)
00386             goto out_err;
00387         if (bof_object_set(bo, "handle", handle))
00388             goto out_err;
00389         bof_decref(handle);
00390         handle = NULL;
00391         radeon_bo_map((struct radeon_bo*)csg->relocs_bo[i], 0);
00392         blob = bof_blob(csg->relocs_bo[i]->size, csg->relocs_bo[i]->ptr);
00393         radeon_bo_unmap((struct radeon_bo*)csg->relocs_bo[i]);
00394         if (blob == NULL)
00395             goto out_err;
00396         if (bof_object_set(bo, "data", blob))
00397             goto out_err;
00398         bof_decref(blob);
00399         blob = NULL;
00400         if (bof_array_append(array, bo))
00401             goto out_err;
00402         bof_decref(bo);
00403         bo = NULL;
00404     }
00405     if (bof_object_set(root, "bo", array))
00406         goto out_err;
00407     sprintf(tmp, "d-0x%04X-%08d.bof", csm->device_id, csm->nbof++);
00408     bof_dump_file(root, tmp);
00409 out_err:
00410     bof_decref(blob);
00411     bof_decref(array);
00412     bof_decref(bo);
00413     bof_decref(size);
00414     bof_decref(handle);
00415     bof_decref(device_id);
00416     bof_decref(root);
00417 }
00418 
00419 static int cs_gem_emit(struct radeon_cs_int *cs)
00420 {
00421     struct cs_gem *csg = (struct cs_gem*)cs;
00422     uint64_t chunk_array[2];
00423     unsigned i;
00424     int r;
00425 
00426 #if CS_BOF_DUMP
00427     cs_gem_dump_bof(cs);
00428 #endif
00429     csg->chunks[0].length_dw = cs->cdw;
00430 
00431     chunk_array[0] = (uint64_t)(uintptr_t)&csg->chunks[0];
00432     chunk_array[1] = (uint64_t)(uintptr_t)&csg->chunks[1];
00433 
00434     csg->cs.num_chunks = 2;
00435     csg->cs.chunks = (uint64_t)(uintptr_t)chunk_array;
00436 
00437     r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS,
00438                             &csg->cs, sizeof(struct drm_radeon_cs));
00439     for (i = 0; i < csg->base.crelocs; i++) {
00440         csg->relocs_bo[i]->space_accounted = 0;
00441         /* bo might be referenced from another context so have to use atomic opertions */
00442         atomic_dec((atomic_t *)radeon_gem_get_reloc_in_cs((struct radeon_bo*)csg->relocs_bo[i]), cs->id);
00443         radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]);
00444         csg->relocs_bo[i] = NULL;
00445     }
00446 
00447     cs->csm->read_used = 0;
00448     cs->csm->vram_write_used = 0;
00449     cs->csm->gart_write_used = 0;
00450     return r;
00451 }
00452 
00453 static int cs_gem_destroy(struct radeon_cs_int *cs)
00454 {
00455     struct cs_gem *csg = (struct cs_gem*)cs;
00456 
00457     free_id(cs->id);
00458     free(csg->relocs_bo);
00459     free(cs->relocs);
00460     free(cs->packets);
00461     free(cs);
00462     return 0;
00463 }
00464 
00465 static int cs_gem_erase(struct radeon_cs_int *cs)
00466 {
00467     struct cs_gem *csg = (struct cs_gem*)cs;
00468     unsigned i;
00469 
00470     if (csg->relocs_bo) {
00471         for (i = 0; i < csg->base.crelocs; i++) {
00472             if (csg->relocs_bo[i]) {
00473                 /* bo might be referenced from another context so have to use atomic opertions */
00474                 atomic_dec((atomic_t *)radeon_gem_get_reloc_in_cs((struct radeon_bo*)csg->relocs_bo[i]), cs->id);
00475                 radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]);
00476                 csg->relocs_bo[i] = NULL;
00477             }
00478         }
00479     }
00480     cs->relocs_total_size = 0;
00481     cs->cdw = 0;
00482     cs->section_ndw = 0;
00483     cs->crelocs = 0;
00484     csg->chunks[0].length_dw = 0;
00485     csg->chunks[1].length_dw = 0;
00486     return 0;
00487 }
00488 
00489 static int cs_gem_need_flush(struct radeon_cs_int *cs)
00490 {
00491     return 0; //(cs->relocs_total_size > (32*1024*1024));
00492 }
00493 
00494 static void cs_gem_print(struct radeon_cs_int *cs, FILE *file)
00495 {
00496     struct radeon_cs_manager_gem *csm;
00497     unsigned int i;
00498 
00499     csm = (struct radeon_cs_manager_gem *)cs->csm;
00500     fprintf(file, "VENDORID:DEVICEID 0x%04X:0x%04X\n", 0x1002, csm->device_id);
00501     for (i = 0; i < cs->cdw; i++) {
00502         fprintf(file, "0x%08X\n", cs->packets[i]);
00503     }
00504 }
00505 
00506 static struct radeon_cs_funcs radeon_cs_gem_funcs = {
00507     cs_gem_create,
00508     cs_gem_write_reloc,
00509     cs_gem_begin,
00510     cs_gem_end,
00511     cs_gem_emit,
00512     cs_gem_destroy,
00513     cs_gem_erase,
00514     cs_gem_need_flush,
00515     cs_gem_print,
00516 };
00517 
00518 static int radeon_get_device_id(int fd, uint32_t *device_id)
00519 {
00520     struct drm_radeon_info info = {};
00521     int r;
00522 
00523     *device_id = 0;
00524     info.request = RADEON_INFO_DEVICE_ID;
00525     info.value = (uintptr_t)device_id;
00526     r = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info,
00527                             sizeof(struct drm_radeon_info));
00528     return r;
00529 }
00530 
00531 struct radeon_cs_manager *radeon_cs_manager_gem_ctor(int fd)
00532 {
00533     struct radeon_cs_manager_gem *csm;
00534 
00535     csm = calloc(1, sizeof(struct radeon_cs_manager_gem));
00536     if (csm == NULL) {
00537         return NULL;
00538     }
00539     csm->base.funcs = &radeon_cs_gem_funcs;
00540     csm->base.fd = fd;
00541     radeon_get_device_id(fd, &csm->device_id);
00542     return &csm->base;
00543 }
00544 
00545 void radeon_cs_manager_gem_dtor(struct radeon_cs_manager *csm)
00546 {
00547     free(csm);
00548 }