diff --git a/.pick_status.json b/.pick_status.json index 1d6c48692e1..3a623d99f68 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -256,7 +256,7 @@ "description": "intel/perf: fix performance counters availability after glFinish", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/gallium/drivers/iris/iris_monitor.c b/src/gallium/drivers/iris/iris_monitor.c index b615476b2cf..919aebca478 100644 --- a/src/gallium/drivers/iris/iris_monitor.c +++ b/src/gallium/drivers/iris/iris_monitor.c @@ -283,7 +283,7 @@ iris_get_monitor_result(struct pipe_context *ctx, assert(gen_perf_is_query_ready(perf_ctx, monitor->query, batch)); unsigned bytes_written; - gen_perf_get_query_data(perf_ctx, monitor->query, + gen_perf_get_query_data(perf_ctx, monitor->query, batch, monitor->result_size, (unsigned*) monitor->result_buffer, &bytes_written); diff --git a/src/gallium/drivers/iris/iris_performance_query.c b/src/gallium/drivers/iris/iris_performance_query.c index 1e8e1d9201a..0ef5c206fe3 100644 --- a/src/gallium/drivers/iris/iris_performance_query.c +++ b/src/gallium/drivers/iris/iris_performance_query.c @@ -214,7 +214,8 @@ iris_get_perf_query_data(struct pipe_context *pipe, struct gen_perf_query_object *obj = perf_query->query; struct gen_perf_context *perf_ctx = ice->perf_ctx; - gen_perf_get_query_data(perf_ctx, obj, data_size, data, bytes_written); + gen_perf_get_query_data(perf_ctx, obj, &ice->batches[IRIS_BATCH_RENDER], + data_size, data, bytes_written); } void diff --git a/src/intel/perf/gen_perf_query.c b/src/intel/perf/gen_perf_query.c index 57b01203a6f..b9744913b16 100644 --- a/src/intel/perf/gen_perf_query.c +++ b/src/intel/perf/gen_perf_query.c @@ -1061,17 +1061,6 @@ gen_perf_wait_query(struct gen_perf_context *perf_ctx, perf_cfg->vtbl.batchbuffer_flush(perf_ctx->ctx, __FILE__, __LINE__); perf_cfg->vtbl.bo_wait_rendering(bo); - - /* Due to a race condition between the OA unit signaling report - * availability and the report actually being written into memory, - * we need to wait for all the reports to come in before we can - * read them. - */ - if (query->queryinfo->kind == GEN_PERF_QUERY_TYPE_OA || - query->queryinfo->kind == GEN_PERF_QUERY_TYPE_RAW) { - while (!read_oa_samples_for_query(perf_ctx, query, current_batch)) - ; - } } bool @@ -1087,8 +1076,8 @@ gen_perf_is_query_ready(struct gen_perf_context *perf_ctx, return (query->oa.results_accumulated || (query->oa.bo && !perf_cfg->vtbl.batch_references(current_batch, query->oa.bo) && - !perf_cfg->vtbl.bo_busy(query->oa.bo) && - read_oa_samples_for_query(perf_ctx, query, current_batch))); + !perf_cfg->vtbl.bo_busy(query->oa.bo))); + case GEN_PERF_QUERY_TYPE_PIPELINE: return (query->pipeline_stats.bo && !perf_cfg->vtbl.batch_references(current_batch, query->pipeline_stats.bo) && @@ -1513,6 +1502,7 @@ get_pipeline_stats_data(struct gen_perf_context *perf_ctx, void gen_perf_get_query_data(struct gen_perf_context *perf_ctx, struct gen_perf_query_object *query, + void *current_batch, int data_size, unsigned *data, unsigned *bytes_written) @@ -1524,6 +1514,17 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx, case GEN_PERF_QUERY_TYPE_OA: case GEN_PERF_QUERY_TYPE_RAW: if (!query->oa.results_accumulated) { + /* Due to the sampling frequency of the OA buffer by the i915-perf + * driver, there can be a 5ms delay between the Mesa seeing the query + * complete and i915 making all the OA buffer reports available to us. + * We need to wait for all the reports to come in before we can do + * the post processing removing unrelated deltas. + * There is a i915-perf series to address this issue, but it's + * not been merged upstream yet. + */ + while (!read_oa_samples_for_query(perf_ctx, query, current_batch)) + ; + read_gt_frequency(perf_ctx, query); uint32_t *begin_report = query->oa.map; uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES; diff --git a/src/intel/perf/gen_perf_query.h b/src/intel/perf/gen_perf_query.h index a0246501f76..d064a5d0669 100644 --- a/src/intel/perf/gen_perf_query.h +++ b/src/intel/perf/gen_perf_query.h @@ -76,6 +76,7 @@ void gen_perf_delete_query(struct gen_perf_context *perf_ctx, struct gen_perf_query_object *query); void gen_perf_get_query_data(struct gen_perf_context *perf_ctx, struct gen_perf_query_object *query, + void *current_batch, int data_size, unsigned *data, unsigned *bytes_written); diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 6c74403da6c..042c236d976 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -323,7 +323,7 @@ brw_get_perf_query_data(struct gl_context *ctx, */ assert(o->Ready); - gen_perf_get_query_data(brw->perf_ctx, obj, + gen_perf_get_query_data(brw->perf_ctx, obj, &brw->batch, data_size, data, bytes_written); }