VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 11.12.2007 - 04:10 |  |
Hey guys, finally some results for you Here's the new optimised shader, which gets ~80 fps on my laptop. (that's twice as fast as the version this thread started out with!) I can't make any guarantees that you'll see the same speedup, for instance my desktop's GF6600 hates some of the optimisations my laptop loves - but since the laptop uses a newer card I'm optimising it for that. Anyway, give it a try and let me know what you think 
vertex file: | | | | void main()
| { gl_Position = ftransform();
| gl_TexCoord[0] = gl_MultiTexCoord0;
| }
| | |
fragment file: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | 48: | | 49: | | 50: | | 51: | | 52: | | 53: | | 54: | | 55: | | 56: | | 57: | | 58: | | 59: | | 60: | | 61: | | 62: | | 63: | | 64: | | 65: | | 66: | | 67: | | 68: | | 69: | | 70: | | 71: | | | | /*
| 2xSaI GLSL shader
| - Copyright (C) 2007 guest(r) - guest.r@gmail.com, Ver - ver.greeneyes@gmail.com
| - License: GNU-GPL
|
| The 2xSaI algorithm
| - Copyright (c) 1999-2001 by Derek Liauw Kie Fa.
| */
|
| const vec4 dt = vec4(16777216.,65536.,256.,1.);
| uniform vec4 OGL2Size,OGL2InvSize;
| uniform sampler2D OGL2Texture;
|
| vec4 TWOxSaI();
|
| void main()
| { gl_FragColor = TWOxSaI();
| }
|
| float reduce(vec4 colour)
| { return dot(colour,dt);
| }
|
| vec4 TWOxSaI()
| { vec4 rValue;
| vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy,
| fp = fract(OGL2Pos), dx, dy,
| pC4 = floor(OGL2Pos)/OGL2Size.xy;
| if(fp.x<.5 || fp.y>=.5)
| { dx = vec2(OGL2InvSize.x,0.);
| dy = vec2(0.,OGL2InvSize.y);
| }
| else
| { dx = vec2(0.,OGL2InvSize.y);
| dy = vec2(OGL2InvSize.x,0.);
| }
| vec4 C0 = texture2D(OGL2Texture,pC4- dx- dy),
| C1 = texture2D(OGL2Texture,pC4 - dy),
| C2 = texture2D(OGL2Texture,pC4+ dx- dy),
| C3 = texture2D(OGL2Texture,pC4- dx ),
| C4 = texture2D(OGL2Texture,pC4 ),
| C5 = texture2D(OGL2Texture,pC4+ dx ),
| C6 = texture2D(OGL2Texture,pC4- dx+ dy),
| C7 = texture2D(OGL2Texture,pC4 + dy),
| C8 = texture2D(OGL2Texture,pC4+ dx+ dy),
| D0 = texture2D(OGL2Texture,pC4- dx+2.*dy),
| D1 = texture2D(OGL2Texture,pC4 +2.*dy),
| D2 = texture2D(OGL2Texture,pC4+ dx+2.*dy),
| D4 = texture2D(OGL2Texture,pC4+2.*dx ),
| D5 = texture2D(OGL2Texture,pC4+2.*dx+ dy),
| p10,p11;
| float c0 = reduce(C0),c1 = reduce(C1),c2 = reduce(C2),c3 = reduce(C3),
| c4 = reduce(C4),c5 = reduce(C5),c6 = reduce(C6),c7 = reduce(C7),
| c8 = reduce(C8),d0 = reduce(D0),d1 = reduce(D1),d2 = reduce(D2),
| d4 = reduce(D4),d5 = reduce(D5);
| float r = float(c5==d4 && c2==c5) + float(c5==c6 && c5==d1) + float(c3!=c4 && c1!=c4 && c3==c5 && c1==c5) + float(c4!=d5 && c4!=d2 && c5==d5 && c5==d2)
| -(float(c3==c4 && c1==c4) + float(c4==d5 && c4==d2) + float(c5!=d4 && c2!=c5 && c4==d4 && c2==c4) + float(c5!=c6 && c5!=d1 && c4==c6 && c4==d1));
| bool a = c4==c5 && c4==c6 && c3!=c7 && c7==d0 || c4==c8 && c3==c4 && c7==d2;
| bool b = c3==c7 && c7==c8 && c4!=c6 && c0==c4 || c5==c7 && c6==c7 && c2==c4;
| bool x = fp.x<.5 && fp.y>=.5 || fp.x>=.5 && fp.y<.5;
| bool y = fp.x>=.5 && fp.y>=.5;
|
| rValue = C4;
| if(x && c4!=c8 && b) rValue = C7;
| if(!(!x || c4!=c8 && b || c5!=c7 && a || c4==c8 && c5==c7 && c4==c5)) rValue = .5*(C4+C7);
| if(y && c5==c7 && (c4!=c8 || c4!=c5 && r< 0.)) rValue = C5;
| if(y && (c4!=c8 && c5!=c7 || c4==c8 && c5==c7 && c4!=c5 && r==0.)) rValue = .25*(C4+C5+C7+C8);
|
| return rValue;
| }
| | |
[Dieser Beitrag wurde am 11.12.2007 - 04:59 von VerGreeneyes aktualisiert]
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 14.12.2007 - 01:41 |  |
Hah, converting it back to the original structure I get more-or-less the same version as what you have on the front page. I did change it to make no unnecessary writes (C4) though, and I think it looks quite clean, so have a look ^_^
fragment file: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | 48: | | 49: | | 50: | | 51: | | 52: | | 53: | | 54: | | 55: | | 56: | | 57: | | 58: | | 59: | | 60: | | 61: | | 62: | | 63: | | 64: | | 65: | | 66: | | 67: | | 68: | | 69: | | 70: | | 71: | | 72: | | 73: | | 74: | | 75: | | 76: | | 77: | | 78: | | 79: | | 80: | | 81: | | 82: | | 83: | | 84: | | 85: | | 86: | | 87: | | | | /*
| 2xSaI GLSL shader
| - Copyright (C) 2007 guest(r) - guest.r@gmail.com, Ver - ver.greeneyes@gmail.com
| - License: GNU-GPL
|
| The 2xSaI algorithm
| - Copyright (c) 1999-2001 by Derek Liauw Kie Fa.
| */
|
| const vec4 dt = vec4(16777216.,65536.,256.,1.);
| uniform vec4 OGL2Size,OGL2InvSize;
| uniform sampler2D OGL2Texture;
|
| void TWOxSaI(out vec4 colour);
|
| void main()
| { TWOxSaI(gl_FragColor);
| }
|
| float GET_RESULT(float A, float B, float C, float D)
| { return float(A!=C && A!=D && B==C && B==D) - float(A==C && A==D);
| }
|
| float reduce(vec4 colour)
| { return dot(colour,dt);
| }
|
| void TWOxSaI(out vec4 colour)
| { vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy,
| fp = fract(OGL2Pos), dx, dy,
| pC4 = floor(OGL2Pos)/OGL2Size.xy;
| if(fp.x<.5 || fp.y>=.5)
| { dx = vec2(OGL2InvSize.x,0.);
| dy = vec2(0.,OGL2InvSize.y);
| }
| else
| { dx = vec2(0.,OGL2InvSize.y);
| dy = vec2(OGL2InvSize.x,0.);
| }
| vec4 C0 = texture2D(OGL2Texture,pC4- dx- dy),
| C1 = texture2D(OGL2Texture,pC4 - dy),
| C2 = texture2D(OGL2Texture,pC4+ dx- dy),
| C3 = texture2D(OGL2Texture,pC4- dx ),
| C4 = texture2D(OGL2Texture,pC4 ),
| C5 = texture2D(OGL2Texture,pC4+ dx ),
| C6 = texture2D(OGL2Texture,pC4- dx+ dy),
| C7 = texture2D(OGL2Texture,pC4 + dy),
| C8 = texture2D(OGL2Texture,pC4+ dx+ dy),
| D0 = texture2D(OGL2Texture,pC4- dx+2.*dy),
| D1 = texture2D(OGL2Texture,pC4 +2.*dy),
| D2 = texture2D(OGL2Texture,pC4+ dx+2.*dy),
| D4 = texture2D(OGL2Texture,pC4+2.*dx ),
| D5 = texture2D(OGL2Texture,pC4+2.*dx+ dy);
| float c0 = reduce(C0),c1 = reduce(C1),c2 = reduce(C2),c3 = reduce(C3),
| c4 = reduce(C4),c5 = reduce(C5),c6 = reduce(C6),c7 = reduce(C7),
| c8 = reduce(C8),d0 = reduce(D0),d1 = reduce(D1),d2 = reduce(D2),
| d4 = reduce(D4),d5 = reduce(D5),r;
| bool x = fp.x<.5 && fp.y>=.5 || fp.x>=.5 && fp.y<.5;
| bool y = fp.x>=.5 && fp.y>=.5;
|
| colour = C4;
| if(c4==c8)
| { if(c5==c7)
| { if(c4!=c5)
| { if(x) colour = .5*(C4+C7);
| if(y)
| { r = GET_RESULT(c4,c5,c3,c1) + GET_RESULT(c4,c5,d5,d2)
| - GET_RESULT(c5,c4,d4,c2) - GET_RESULT(c5,c4,c6,d1);
| if(r< 0.) colour = C5;
| if(r==0.) colour = .25*(C4+C5+C7+C8);
| }
| }
| }else if(x && !(c3==c4 && c7==d2 || c4==c5 && c4==c6 && c3!=c7 && c7==d0)) colour = .5*(C4+C7);
| }else
| { if(c5==c7)
| { if(y) colour = C5;
| if(x) colour = (c2==c4 && c6==c7 || c3==c7 && c7==c8 && c4!=c6 && c0==c4) ? C7 : .5*(C4+C7);
| }else
| { if(y) colour = .25*(C4+C5+C7+C8);
| if(x)
| { if(c3==c7 && c7==c8 && c4!=c6 && c0==c4) colour = C7;
| else if(!(c4==c5 && c4==c6 && c3!=c7 && c7==d0)) colour = .5*(C4+C7);
| }
| }
| }
| }
| | |
Edit: out of interest, could you compare the speed of the above to the speed of the following?
fragment file: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | 48: | | 49: | | 50: | | 51: | | 52: | | 53: | | 54: | | 55: | | 56: | | 57: | | 58: | | 59: | | 60: | | 61: | | 62: | | 63: | | 64: | | 65: | | 66: | | 67: | | 68: | | 69: | | 70: | | 71: | | 72: | | 73: | | 74: | | 75: | | 76: | | 77: | | 78: | | 79: | | 80: | | 81: | | 82: | | 83: | | 84: | | | | /*
| 2xSaI GLSL shader
| - Copyright (C) 2007 guest(r) - guest.r@gmail.com, Ver - ver.greeneyes@gmail.com
| - License: GNU-GPL
|
| The 2xSaI algorithm
| - Copyright (c) 1999-2001 by Derek Liauw Kie Fa.
| */
|
| const vec4 dt = vec4(16777216.,65536.,256.,1.);
| uniform vec4 OGL2Size,OGL2InvSize;
| uniform sampler2D OGL2Texture;
|
| void TWOxSaI(out vec4 colour);
|
| void main()
| { TWOxSaI(gl_FragColor);
| }
|
| float GET_RESULT(float A, float B, float C, float D)
| { return float(A!=C && A!=D && B==C && B==D) - float(A==C && A==D);
| }
|
| float reduce(vec4 colour)
| { return dot(colour,dt);
| }
|
| void TWOxSaI(out vec4 colour)
| { vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy,
| fp = fract(OGL2Pos), dx, dy,
| pC4 = floor(OGL2Pos)/OGL2Size.xy;
| if(fp.x<.5 || fp.y>=.5)
| { dx = vec2(OGL2InvSize.x,0.);
| dy = vec2(0.,OGL2InvSize.y);
| }
| else
| { dx = vec2(0.,OGL2InvSize.y);
| dy = vec2(OGL2InvSize.x,0.);
| }
| vec4 C0 = texture2D(OGL2Texture,pC4- dx- dy),
| C1 = texture2D(OGL2Texture,pC4 - dy),
| C2 = texture2D(OGL2Texture,pC4+ dx- dy),
| C3 = texture2D(OGL2Texture,pC4- dx ),
| C4 = texture2D(OGL2Texture,pC4 ),
| C5 = texture2D(OGL2Texture,pC4+ dx ),
| C6 = texture2D(OGL2Texture,pC4- dx+ dy),
| C7 = texture2D(OGL2Texture,pC4 + dy),
| C8 = texture2D(OGL2Texture,pC4+ dx+ dy),
| D0 = texture2D(OGL2Texture,pC4- dx+2.*dy),
| D1 = texture2D(OGL2Texture,pC4 +2.*dy),
| D2 = texture2D(OGL2Texture,pC4+ dx+2.*dy),
| D4 = texture2D(OGL2Texture,pC4+2.*dx ),
| D5 = texture2D(OGL2Texture,pC4+2.*dx+ dy);
| float c0 = reduce(C0),c1 = reduce(C1),c2 = reduce(C2),c3 = reduce(C3),
| c4 = reduce(C4),c5 = reduce(C5),c6 = reduce(C6),c7 = reduce(C7),
| c8 = reduce(C8),d0 = reduce(D0),d1 = reduce(D1),d2 = reduce(D2),
| d4 = reduce(D4),d5 = reduce(D5),r;
| bool x = fp.x<.5 && fp.y>=.5 || fp.x>=.5 && fp.y<.5;
| bool y = fp.x>=.5 && fp.y>=.5;
|
| colour = C4;
| if(c4==c8 && c5==c7 && c4!=c5)
| { if(x) colour = .5*(C4+C7);
| if(y)
| { r = GET_RESULT(c4,c5,c3,c1) + GET_RESULT(c4,c5,d5,d2)
| - GET_RESULT(c5,c4,d4,c2) - GET_RESULT(c5,c4,c6,d1);
| if(r< 0.) colour = C5;
| if(r==0.) colour = .25*(C4+C5+C7+C8);
| }
| }
| if(c4==c8 && c5!=c7 && x && !(c3==c4 && c7==d2 || c4==c5 && c4==c6 && c3!=c7 && c7==d0)) colour = .5*(C4+C7);
| if(c4!=c8 && c5==c7)
| { if(y) colour = C5;
| if(x) colour = (c2==c4 && c6==c7 || c3==c7 && c7==c8 && c4!=c6 && c0==c4) ? C7 : .5*(C4+C7);
| }
| if(c4!=c8 && c5!=c7)
| { if(y) colour = .25*(C4+C5+C7+C8);
| if(x)
| { if(c3==c7 && c7==c8 && c4!=c6 && c0==c4) colour = C7;
| else if(!(c4==c5 && c4==c6 && c3!=c7 && c7==d0)) colour = .5*(C4+C7);
| }
| }
| }
| | |
[Dieser Beitrag wurde am 14.12.2007 - 01:49 von VerGreeneyes aktualisiert]
|