|
View:
New views
5 Messages
—
Rating Filter:
Alert me
|
|
|
[PATCH] pixman: C fast path for add_1000_1000 and over_n_1_8888Hello,
These two fast path functions dealing with 1-bit data are needed to improve performance of xfce4 terminal. Some other applications may potentially benefit too. The patches are also available here: http://cgit.freedesktop.org/~siamashka/pixman/log/?h=1bit-for-master -- Best regards, Siarhei Siamashka From 07d3c5924e6a1196ce1025461084b6400110cc8f Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@...> Date: Fri, 23 Oct 2009 20:56:30 +0300 Subject: [PATCH] blitters-test updated to also randomly generate mask_x/mask_y --- test/blitters-test.c | 10 ++++++++-- 1 files changed, 8 insertions(+), 2 deletions(-) diff --git a/test/blitters-test.c b/test/blitters-test.c index b8b6eba..bba6b1e 100644 --- a/test/blitters-test.c +++ b/test/blitters-test.c @@ -473,6 +473,7 @@ test_composite (uint32_t initcrc, int testnum, int verbose) int src_stride, dst_stride; int src_x, src_y; int dst_x, dst_y; + int mask_x, mask_y; int w, h; int op; pixman_format_code_t src_fmt, dst_fmt, mask_fmt; @@ -516,6 +517,8 @@ test_composite (uint32_t initcrc, int testnum, int verbose) mask_img = NULL; mask_fmt = -1; + mask_x = 0; + mask_y = 0; if (lcg_rand_n (2)) { @@ -534,6 +537,9 @@ test_composite (uint32_t initcrc, int testnum, int verbose) if (lcg_rand_n (2)) pixman_image_set_component_alpha (mask_img, 1); + + mask_x = lcg_rand_n (pixman_image_get_width (mask_img)); + mask_y = lcg_rand_n (pixman_image_get_height (mask_img)); } src_width = pixman_image_get_width (src_img); @@ -568,7 +574,7 @@ test_composite (uint32_t initcrc, int testnum, int verbose) } pixman_image_composite (op, src_img, mask_img, dst_img, - src_x, src_y, src_x, src_y, dst_x, dst_y, w, h); + src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h); if (verbose) { @@ -641,7 +647,7 @@ main (int argc, char *argv[]) /* Predefined value for running with all the fastpath functions disabled. It needs to be updated every time when changes are introduced to this program or behavior of pixman changes! */ - if (crc == 0x481369DE) + if (crc == 0x1911E2C3) { printf ("blitters test passed\n"); } -- 1.5.4.3 From 5f2a77a0339ff70e1384c866b4e404dfb00784eb Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@...> Date: Mon, 26 Oct 2009 01:56:55 +0200 Subject: [PATCH] C fast path for add_1000_1000 and over_n_1_8888 These two fast path functions dealing with 1-bit data are needed to improve performance of xfce4 terminal. Some other applications may potentially benefit too. --- pixman/pixman-fast-path.c | 126 +++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 126 insertions(+), 0 deletions(-) diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index c053229..871a9f8 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -1025,6 +1025,129 @@ fast_composite_add_n_8_8 (pixman_implementation_t *imp, } } +#ifdef WORDS_BIGENDIAN + +#define CREATE_BITMASK(n) (0x80000000 >> (n)) +#define UPDATE_BITMASK(n) ((n) >> 1) + +#else + +#define CREATE_BITMASK(n) (1 << (n)) +#define UPDATE_BITMASK(n) ((n) << 1) + +#endif + +#define TEST_BIT(p, n) \ + (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) +#define SET_BIT(p, n) \ + do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); + +static void +fast_composite_add_1000_1000 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, + src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t, + dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + /* + * TODO: improve performance by processing uint32_t data instead + * of individual bits + */ + if (TEST_BIT (src, src_x + w)) + SET_BIT (dst, dest_x + w); + } + } +} + +static void +fast_composite_over_n_1_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src; + uint32_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = over (src, *dst); + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } +} + /* * Simple bitblt */ @@ -1107,6 +1230,8 @@ static const pixman_fast_path_t c_fast_paths[] = { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fast_composite_over_n_8_8888, 0 }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fast_composite_over_n_8_8888, 0 }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fast_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a1, PIXMAN_a8r8g8b8, fast_composite_over_n_1_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a1, PIXMAN_x8r8g8b8, fast_composite_over_n_1_8888, 0 }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fast_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, @@ -1126,6 +1251,7 @@ static const pixman_fast_path_t c_fast_paths[] = { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fast_composite_add_8888_8888, 0 }, { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fast_composite_add_8888_8888, 0 }, { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fast_composite_add_8000_8000, 0 }, + { PIXMAN_OP_ADD, PIXMAN_a1, PIXMAN_null, PIXMAN_a1, fast_composite_add_1000_1000, 0 }, { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fast_composite_add_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fast_composite_add_n_8_8, 0 }, { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, fast_composite_solid_fill, 0 }, -- 1.5.4.3 _______________________________________________ cairo mailing list cairo@... http://lists.cairographics.org/mailman/listinfo/cairo |
|
|
Re: [PATCH] pixman: C fast path for add_1000_1000 and over_n_1_8888Hi,
> These two fast path functions dealing with 1-bit data are needed > to improve performance of xfce4 terminal. Some other applications > may potentially benefit too. A couple of minor comments: Can we get the two fast paths enabled in two separate commits to facilitate bisecting? > +static void > +fast_composite_over_n_1_8888 (pixman_implementation_t *imp, > + pixman_op_t op, > + > + [...] > + if (bitcache & bitmask) > + *dst = over (src, *dst); It would likely be a big win to check whether the alpha channel of the source is 0xFF (which is almost always is), then simply writing out the source if it is. That would completely avoid reading the destination from memory. > + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a1, PIXMAN_a8r8g8b8, fast_composite_over_n_1_8888, 0 }, > + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a1, PIXMAN_x8r8g8b8, fast_composite_over_n_1_8888, 0 }, I think this one can be enabled for destinations of type x8b8g8r8 and a8b8g8r8 as well, since pixman_image_get_solid() takes care of the swapping. Other than those things, it looks good to me. Thanks, Soren _______________________________________________ cairo mailing list cairo@... http://lists.cairographics.org/mailman/listinfo/cairo |
|
|
Re: [PATCH] pixman: C fast path for add_1000_1000 and over_n_1_8888On Monday 26 October 2009, Soeren Sandmann wrote:
> Hi, > > > These two fast path functions dealing with 1-bit data are needed > > to improve performance of xfce4 terminal. Some other applications > > may potentially benefit too. > > A couple of minor comments: > > Can we get the two fast paths enabled in two separate commits to > facilitate bisecting? > > > +static void > > +fast_composite_over_n_1_8888 (pixman_implementation_t *imp, > > + pixman_op_t op, > > + > > + [...] > > + if (bitcache & bitmask) > > + *dst = over (src, *dst); > > It would likely be a big win to check whether the alpha channel of the > source is 0xFF (which is almost always is), then simply writing out > the source if it is. That would completely avoid reading the > destination from memory. > > + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a1, PIXMAN_a8r8g8b8, > > fast_composite_over_n_1_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, > > PIXMAN_a1, PIXMAN_x8r8g8b8, fast_composite_over_n_1_8888, 0 }, > > I think this one can be enabled for destinations of type x8b8g8r8 and > a8b8g8r8 as well, since pixman_image_get_solid() takes care of the > swapping. > > Other than those things, it looks good to me. Thanks for the review. Patches corrected according to your comments, tested on both big and little endian systems and pushed to master. -- Best regards, Siarhei Siamashka _______________________________________________ cairo mailing list cairo@... http://lists.cairographics.org/mailman/listinfo/cairo |
|
|
Re: [PATCH] pixman: C fast path for add_1000_1000 and over_n_1_8888Hi Siarhei,
I was just worrying about the absence of such paths from the current set of cairo-traces. The only attempt I've made at capturing a wide range of fonts and languages are the gnome-terminal and original firefox traces. I suspect that these and my fontsets do not accurately reflect your usage at all (and so my profiling is woefully myopic). xfce4-terminal with a recent vte will use cairo for its rendering so should generate a good trace, as will firefox and other gtk+ applications. Could you record some sample cairo-traces so that we can see how much impact the addition of pixman fast paths makes to your workflow, and so that we do not neglect you when developing the other backends as well? Thanks, -ickle -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ cairo mailing list cairo@... http://lists.cairographics.org/mailman/listinfo/cairo |
|
|
Re: [PATCH] pixman: C fast path for add_1000_1000 and over_n_1_8888On Sunday 08 November 2009, Chris Wilson wrote:
> Hi Siarhei, > I was just worrying about the absence of such paths from the > current set of cairo-traces. The only attempt I've made at capturing a > wide range of fonts and languages are the gnome-terminal and original > firefox traces. I suspect that these and my fontsets do not accurately > reflect your usage at all (and so my profiling is woefully myopic). > xfce4-terminal with a recent vte will use cairo for its rendering so > should generate a good trace, as will firefox and other gtk+ > applications. Could you record some sample cairo-traces so that we can > see how much impact the addition of pixman fast paths makes to your > workflow, and so that we do not neglect you when developing the other > backends as well? performance was quite bad when using bitmap fonts such as terminus: http://www.is-vn.bg/hamster It's probably not a very important case for most users, though I myself prefer to use bitmap fonts in terminals. But it just shows exceptionally bad performance here unless pixman has the needed fast path functions. Here is the trace (scrolling 'man gcc' in xfce4-terminal with terminus font, 16bpp desktop, ARM cpu): http://people.freedesktop.org/~siamashka/files/20091109/Terminal.30676.lzma Actually after upgrading cairo and some of the other libraries, now I get a bit different behavior from what I have seen before. This is a log from oprofile for Xorg process with current pixman git: samples % image name symbol name 13296 29.1528 libpixman-1.so.0.17.1 combine_over_u 6452 14.1466 libpixman-1.so.0.17.1 fetch_scanline_r5g6b5 5516 12.0944 libpixman-1.so.0.17.1 fetch_scanline_a1 2273 4.9838 libpixman-1.so.0.17.1 store_scanline_r5g6b5 1741 3.8173 libpixman-1.so.0.17.1 fast_composite_add_1000_1000 1718 3.7669 libc-2.9.so memcpy 1176 2.5785 libpixman-1.so.0.17.1 arm_neon_fill 1114 2.4426 vmlinux __memzero 951 2.0852 libpixman-1.so.0.17.1 bits_image_fetch_solid_32 640 1.4033 libpixman-1.so.0.17.1 _pixman_run_fast_path 513 1.1248 libc-2.9.so _int_malloc 447 0.9801 libpixman-1.so.0.17.1 _pixman_bits_image_setup_raw_accessors 377 0.8266 libc-2.9.so malloc 350 0.7674 libfb.so image_from_pict 321 0.7038 libc-2.9.so _int_free 307 0.6731 vmlinux __do_softirq 293 0.6424 Xorg miGlyphs 270 0.5920 Xorg CompositePicture 210 0.4604 libc-2.9.so free 204 0.4473 libfb.so fbComposite It clearly shows that now 'over_n_1_0565' is also badly needed for this use case. Earlier only 'over_n_1_8888' was called and then the result was converted to 0565 as an additional step (which was bad itself, but represented a separate problem which seems to be solved now). Still 'over_n_1_8888' fast path is also useful for 32bpp desktop. Like PS3, which I'm using for testing big endian compatibility. I'll post some more benchmarks for this 1-bit stuff later. -- Best regards, Siarhei Siamashka _______________________________________________ cairo mailing list cairo@... http://lists.cairographics.org/mailman/listinfo/cairo |
| Free embeddable forum powered by Nabble | Forum Help |