On coherent systems (not-AGP) the IB should be in cached memory so should
be just as fast, so we can avoid copying to temporary pages and just use it
directly.
provides minor speedups on rv530: gears ~1820->1860, ipers: 29.9->30.6,
but always good to use less CPU if we can.
v3: cleanup unneeded bits.
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
p->chunks[p->chunk_ib_idx].length_dw);
return -EINVAL;
}
p->chunks[p->chunk_ib_idx].length_dw);
return -EINVAL;
}
- p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
- p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
- p->chunks[p->chunk_ib_idx].kpage[1] == NULL)
- return -ENOMEM;
+ if ((p->rdev->flags & RADEON_IS_AGP)) {
+ p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
+ p->chunks[p->chunk_ib_idx].kpage[1] == NULL) {
+ kfree(p->chunks[i].kpage[0]);
+ kfree(p->chunks[i].kpage[1]);
+ return -ENOMEM;
+ }
+ }
p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1;
p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1;
p->chunks[p->chunk_ib_idx].last_copied_page = -1;
p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1;
p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1;
p->chunks[p->chunk_ib_idx].last_copied_page = -1;
kfree(parser->relocs_ptr);
for (i = 0; i < parser->nchunks; i++) {
kfree(parser->chunks[i].kdata);
kfree(parser->relocs_ptr);
for (i = 0; i < parser->nchunks; i++) {
kfree(parser->chunks[i].kdata);
- kfree(parser->chunks[i].kpage[0]);
- kfree(parser->chunks[i].kpage[1]);
+ if ((parser->rdev->flags & RADEON_IS_AGP)) {
+ kfree(parser->chunks[i].kpage[0]);
+ kfree(parser->chunks[i].kpage[1]);
+ }
}
kfree(parser->chunks);
kfree(parser->chunks_array);
}
kfree(parser->chunks);
kfree(parser->chunks_array);
struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
int i;
int size = PAGE_SIZE;
struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
int i;
int size = PAGE_SIZE;
+ bool copy1 = (p->rdev->flags & RADEON_IS_AGP) ? false : true;
for (i = ibc->last_copied_page + 1; i < pg_idx; i++) {
if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)),
for (i = ibc->last_copied_page + 1; i < pg_idx; i++) {
if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)),
- new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
-
if (pg_idx == ibc->last_page_index) {
size = (ibc->length_dw * 4) % PAGE_SIZE;
if (pg_idx == ibc->last_page_index) {
size = (ibc->length_dw * 4) % PAGE_SIZE;
- if (size == 0)
- size = PAGE_SIZE;
+ if (size == 0)
+ size = PAGE_SIZE;
+ new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
+ if (copy1)
+ ibc->kpage[new_page] = p->ib->ptr + (pg_idx * (PAGE_SIZE / 4));
+
if (DRM_COPY_FROM_USER(ibc->kpage[new_page],
ibc->user_ptr + (pg_idx * PAGE_SIZE),
size)) {
if (DRM_COPY_FROM_USER(ibc->kpage[new_page],
ibc->user_ptr + (pg_idx * PAGE_SIZE),
size)) {
- /* copy to IB here */
- memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
+ /* copy to IB for non single case */
+ if (!copy1)
+ memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
ibc->last_copied_page = pg_idx;
ibc->kpage_idx[new_page] = pg_idx;
ibc->last_copied_page = pg_idx;
ibc->kpage_idx[new_page] = pg_idx;