| Author | Topics » Book an abo for this thread |  |
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 15.05.2007 - 23:01 |  |
Cool, I'll give it a shot a bit later. Not much to report on the WA2xSaI front, but in looking to make the code more sane so I could work on it more easily I managed to speed up the 2xSaI filter a bit. Also the WA2xSaI filter, but you still won't be able to use it 
Fragment file 2xSaI: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | 48: | | 49: | | 50: | | 51: | | 52: | | 53: | | 54: | | 55: | | 56: | | 57: | | 58: | | 59: | | 60: | | 61: | | 62: | | 63: | | 64: | | 65: | | 66: | | 67: | | 68: | | 69: | | 70: | | 71: | | 72: | | 73: | | 74: | | 75: | | 76: | | | | const vec4 dt = vec4(16777216.,65536.,256.,1.);
| uniform vec4 OGL2Size, OGL2InvSize;
| uniform sampler2D OGL2Texture;
|
| float reduce(vec4 colour){
| return dot(colour,dt);
| }
|
| float GET_RESULT(float A, float B, float C, float D){
| return float(A != C && A != D && B == C && B == D) - float(A == C && A == D);
| }
|
| vec4 xSaI(){
| vec4 rValue;
| vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy,
| fp = fract(OGL2Pos), dx, dy,
| pC4 = floor(OGL2Pos)/OGL2Size.xy;
| if(fp.x < .5 || fp.y >= .5){
| dx = vec2(OGL2InvSize.x,0.);
| dy = vec2(0.,OGL2InvSize.y);
| }else{
| dx = vec2(0.,OGL2InvSize.y);
| dy = vec2(OGL2InvSize.x,0.);
| } vec4 C0 = texture2D(OGL2Texture,pC4- dx- dy),
| C1 = texture2D(OGL2Texture,pC4 - dy),
| C2 = texture2D(OGL2Texture,pC4+ dx- dy),
| C3 = texture2D(OGL2Texture,pC4- dx ),
| C4 = texture2D(OGL2Texture,pC4 ),
| C5 = texture2D(OGL2Texture,pC4+ dx ),
| C6 = texture2D(OGL2Texture,pC4- dx+ dy),
| C7 = texture2D(OGL2Texture,pC4 + dy),
| C8 = texture2D(OGL2Texture,pC4+ dx+ dy),
| D0 = texture2D(OGL2Texture,pC4- dx+2.*dy),
| D1 = texture2D(OGL2Texture,pC4 +2.*dy),
| D2 = texture2D(OGL2Texture,pC4+ dx+2.*dy),
| D4 = texture2D(OGL2Texture,pC4+2.*dx ),
| D5 = texture2D(OGL2Texture,pC4+2.*dx+ dy),
| p10,p11;
| float c0 = reduce(C0),c1 = reduce(C1),c2 = reduce(C2),c3 = reduce(C3),
| c4 = reduce(C4),c5 = reduce(C5),c6 = reduce(C6),c7 = reduce(C7),
| c8 = reduce(C8),d0 = reduce(D0),d1 = reduce(D1),d2 = reduce(D2),
| d4 = reduce(D4),d5 = reduce(D5);
| if(c4 == c8){
| if(c5 != c7){
| p10 = (c4 == c3 && c7 == d2 || c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : .5*(C4+C7);
| p11 = C4;
| }
| else{
| if(c4 == c5) p11 = (p10 = C4);
| else{
| float r = GET_RESULT(c4,c5,c3,c1)+GET_RESULT(c4,c5,d5,d2)
| -GET_RESULT(c5,c4,d4,c2)-GET_RESULT(c5,c4,c6,d1);
| p10 = .5*(C4+C7);
| if(r > 0.) p11 = C4;
| else if(r < 0.) p11 = C5;
| else p11 = .25*(C4+C5+C7+C8);
| }
| }
| }else if(c5 == c7){
| p10 = (c7 == c6 && c4 == c2 || c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
| p11 = C5;
| }else{
| p11 = 0.25*(C4+C5+C7+C8);
| if(c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) p10 = C4;
| else if(c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) p10 = C7;
| else p10 = 0.5*(C4+C7);
| }
| if(fp.x >= .5 && fp.y >= .5) rValue = p11;
| else if(fp.x >= .5 || fp.y >= .5) rValue = p10;
| else rValue = C4;
| return rValue;
| }
|
| void main(){
| gl_FragColor = xSaI();
| | } | | Fragment file WA2xSaI: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | 48: | | 49: | | 50: | | 51: | | 52: | | 53: | | 54: | | 55: | | 56: | | 57: | | 58: | | 59: | | 60: | | 61: | | 62: | | 63: | | 64: | | 65: | | 66: | | 67: | | 68: | | 69: | | 70: | | 71: | | 72: | | 73: | | 74: | | 75: | | 76: | | 77: | | 78: | | 79: | | 80: | | 81: | | 82: | | 83: | | 84: | | 85: | | 86: | | 87: | | 88: | | 89: | | 90: | | 91: | | 92: | | 93: | | 94: | | 95: | | 96: | | | | uniform vec4 OGL2Size, OGL2InvSize;
| uniform sampler2D OGL2Texture;
| const float pi = 1.570796326794896619231321691640;
| const vec2 pC4 = gl_TexCoord[0].xy;
| const vec2 rx = vec2(OGL2InvSize.x,0.);
| const vec2 ry = vec2(0.,OGL2InvSize.y);
| vec2 dx, dy;
| const vec4 dt = vec4(16777216.,65536.,256.,1.);
|
| float reduce(vec4);
| float GET_RESULT(float, float, float, float);
| vec4 WA2xSaI();
| vec4 TWOxSaI(vec2, vec2);
|
| void main(){
| gl_FragColor = WA2xSaI();
| }
|
| float reduce(vec4 colour){
| return dot(colour,dt);
| }
|
| float GET_RESULT(float A, float B, float C, float D){
| return float(A != C && A != D && B == C && B == D) - float(A == C && A == D);
| }
|
| vec4 WA2xSaI(){
| vec2 fp = fract(pC4*OGL2Size.xy),
| dx = vec2(OGL2InvSize.x,0.), dy = vec2(0.,OGL2InvSize.y),
| s0 = vec2(1.-fp.x,1.-fp.y), s1 = vec2(1.-fp.x,fp.y),
| s2 = vec2( fp.x,1.-fp.y), s3 = vec2( fp.x,fp.y);
| if(fp.x >= .5){fp.x = 1. - fp.x; dx = -dx;}
| if(fp.y >= .5){fp.y = 1. - fp.y; dy = -dy;}
| fp *= 2.;
| if(fp.x >= .5){fp.x = 1. - fp.x; dx = vec2(0.);}
| if(fp.y >= .5){fp.y = 1. - fp.y; dy = vec2(0.);}
| mat4 C = mat4(TWOxSaI(s0,pC4-dx-dy),TWOxSaI(s1,pC4-dx),
| TWOxSaI(s2,pC4 -dy),TWOxSaI(s3,pC4 ));
| mat2 gp = mat2((fp+.5)*(fp+.5),(fp-.5)*(fp-.5));
| vec4 c = vec4(gp[0][0]+gp[0][1],gp[0][0]+gp[1][1],
| gp[1][0]+gp[0][1],gp[1][0]+gp[1][1] ;
| c=-sqrt(c);if(c.x < -1.)c.x=-1.;if(c.y < -1.)c.y=-1.;if(c.z < -1.)c.z=-1.;c=cos(pi*c);
| return (C[0]*c.x+C[1]*c.y+C[2]*c.z+C[3]*c.w)/(c.x+c.y+c.z+c.w);
| }
|
| vec4 TWOxSaI(vec2 fp, vec2 pC4){
| vec4 rValue;
| if(fp.x < .5 || fp.y >= .5){dx = rx; dy = ry;}else{dx = ry; dy = rx;}
| vec4 C0 = texture2D(OGL2Texture,pC4- dx- dy),
| C1 = texture2D(OGL2Texture,pC4 - dy),
| C2 = texture2D(OGL2Texture,pC4+ dx- dy),
| C3 = texture2D(OGL2Texture,pC4- dx ),
| C4 = texture2D(OGL2Texture,pC4 ),
| C5 = texture2D(OGL2Texture,pC4+ dx ),
| C6 = texture2D(OGL2Texture,pC4- dx+ dy),
| C7 = texture2D(OGL2Texture,pC4 + dy),
| C8 = texture2D(OGL2Texture,pC4+ dx+ dy),
| D0 = texture2D(OGL2Texture,pC4- dx+2.*dy),
| D1 = texture2D(OGL2Texture,pC4 +2.*dy),
| D2 = texture2D(OGL2Texture,pC4+ dx+2.*dy),
| D4 = texture2D(OGL2Texture,pC4+2.*dx ),
| D5 = texture2D(OGL2Texture,pC4+2.*dx+ dy),
| p10, p11;
| float c0 = reduce(C0), c1 = reduce(C1), c2 = reduce(C2), c3 = reduce(C3),
| c4 = reduce(C4), c5 = reduce(C5), c6 = reduce(C6), c7 = reduce(C7),
| c8 = reduce(C8), d0 = reduce(D0), d1 = reduce(D1), d2 = reduce(D2),
| d4 = reduce(D4), d5 = reduce(D5);
| if(c4 == c8){
| if(c5 != c7){
| p10 = (c4 == c3 && c7 == d2 || c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : .5*(C4+C7);
| p11 = C4;
| }else{
| if(c4 == c5) p11 = (p10 = C4);
| else{
| float r = GET_RESULT(c4,c5,c3,c1)+GET_RESULT(c4,c5,d5,d2)
| -GET_RESULT(c5,c4,d4,c2)-GET_RESULT(c5,c4,c6,d1);
| p10 = .5*(C4+C7);
| if(r > 0.) p11 = C4;
| else if(r < 0.) p11 = C5;
| else p11 = .25*(C4+C5+C7+C8);
| }
| }
| }else if(c5 == c7){
| p10 = (c7 == c6 && c4 == c2 || c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : .5*(C4+C7);
| p11 = C5;
| }else{
| p11 = .25*(C4+C5+C7+C8);
| if(c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) p10 = C4;
| else if(c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) p10 = C7;
| else p10 = .5*(C4+C7);
| }
| if(fp.x >= .5 && fp.y >= .5) rValue = p11;
| else if(fp.x >= .5 || fp.y >= .5) rValue = p10;
| else rValue = C4;
| return rValue;
| | } | |
[Dieser Beitrag wurde am 16.05.2007 - 01:07 von VerGreeneyes aktualisiert]
|
|
|
guest  Real addict
  

Status:Offline Date registered: 30.07.2004 Post:856 Send Message | Created on 16.05.2007 - 11:33 |  |
I guess one step back brings later two ahead or something. 
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 16.05.2007 - 12:25 |  |
So it would seem! I'm getting about 67fps with the 2xSaI shader on my laptop now; compare that to the 41fps I started out with!
Here are the filters you asked for:
Scale2x,
WAScale2x, (Weighed Average Scale2x)
Scale2xCR, (Scale2x with reduced colours)
WAScale2xCR.
WAScale2xCR is the slowest of the lot, getting about the same speed as WA2xSaI on my laptop.
Vertex file: | | | | void main(){
| gl_Position = ftransform();
| gl_TexCoord[0] = gl_MultiTexCoord0;
| | } | |
Fragment file Scale2x: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | | | uniform vec4 OGL2Size,OGL2Param;
| uniform sampler2D OGL2Texture;
|
| vec4 Scale2x();
|
| void main(){
| gl_FragColor = Scale2x();
| }
|
| vec4 Scale2x(){
| vec4 pC0 = gl_TexCoord[0],
| dx = vec4(OGL2Param.x,0.,0.,0.),
| dy = vec4(0.,OGL2Param.y,0.,0.),
| C0 = texture2DProj(OGL2Texture,pC0),
| CL = texture2DProj(OGL2Texture,pC0-dx),
| CR = texture2DProj(OGL2Texture,pC0+dx),
| CT = texture2DProj(OGL2Texture,pC0-dy),
| CB = texture2DProj(OGL2Texture,pC0+dy),
| TP;
| vec2 fp = fract(pC0.xy*OGL2Size.xy);
| if(fp.x >= .5){TP=CR;CR=CL;CL=TP;}
| if(fp.y >= .5){TP=CT;CT=CB;CB=TP;}
| if(CT == CL && CT != CR && CL != CB) C0 = CL;
| return C0;
| | } | |
Fragment file WAScale2x: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | | | uniform vec4 OGL2Size,OGL2Param;
| uniform sampler2D OGL2Texture;
| const float pi = 1.570796326794896619231321691640;
| const vec4 pC4 = gl_TexCoord[0];
| const vec4 rx = vec4(OGL2Param.x,0.,0.,0.);
| const vec4 ry = vec4(0.,OGL2Param.y,0.,0.);
|
| vec4 wAverage();
| vec4 Scale2x(vec2, vec4);
|
| void main(){
| gl_FragColor = wAverage();
| }
|
| vec4 wAverage(){
| vec2 fp = fract(pC4.xy*OGL2Size.xy),
| s0 = vec2(1.-fp.x,1.-fp.y), s1 = vec2(1.-fp.x,fp.y),
| s2 = vec2( fp.x,1.-fp.y), s3 = vec2( fp.x,fp.y);
| vec4 dx = rx, dy = ry;
| if(fp.x >= .5){dx = -dx; fp.x = 1. - fp.x;}
| if(fp.y >= .5){dy = -dy; fp.y = 1. - fp.y;}
| fp *= 2.;
| if(fp.x >= .5){dx = vec4(0.); fp.x = 1. - fp.x;}
| if(fp.y >= .5){dy = vec4(0.); fp.y = 1. - fp.y;}
| mat2 gp = mat2((fp+.5)*(fp+.5),(fp-.5)*(fp-.5));
| vec4 c = vec4(gp[0][0]+gp[0][1],gp[0][0]+gp[1][1],
| gp[1][0]+gp[0][1],gp[1][0]+gp[1][1] ;
| mat4 C = mat4(Scale2x(s0,pC4-dx-dy), Scale2x(s1,pC4-dx),
| Scale2x(s2,pC4-dy ), Scale2x(s3,pC4 ));
| c = -sqrt(c);
| if(c.x < -1.)c.x=-1.; if(c.y < -1.)c.y=-1.; if(c.z < -1.)c.z=-1.;
| c = cos(pi*c);
| return (C[0]*c.x+C[1]*c.y+C[2]*c.z+C[3]*c.w)/(c.x+c.y+c.z+c.w);
| }
|
| vec4 Scale2x(vec2 fp, vec4 pC0){
| vec4 C0 = texture2DProj(OGL2Texture,pC0),
| CL = texture2DProj(OGL2Texture,pC0-rx),
| CR = texture2DProj(OGL2Texture,pC0+rx),
| CT = texture2DProj(OGL2Texture,pC0-ry),
| CB = texture2DProj(OGL2Texture,pC0+ry),
| TP;
| if(fp.x >= .5){TP=CR;CR=CL;CL=TP;}
| if(fp.y >= .5){TP=CT;CT=CB;CB=TP;}
| if(CT == CL && CT != CR && CL != CB) C0 = CL;
| return C0;
| | } | |
Fragment file Scale2xCR: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | | | uniform vec4 OGL2Size,OGL2Param;
| uniform sampler2D OGL2Texture;
| const vec4 pl = vec4(8.);
| const vec4 vec5 = vec4(.5);
|
| vec4 Scale2x();
|
| void main(){
| gl_FragColor = Scale2x();
| }
|
| vec4 Scale2x(){
| vec4 pC0 = gl_TexCoord[0],
| dx = vec4(OGL2Param.x,0.,0.,0.),
| dy = vec4(0.,OGL2Param.y,0.,0.),
| C0 = floor(pl*sqrt(texture2DProj(OGL2Texture,pC0 ))+vec5)/pl,
| CL = floor(pl*sqrt(texture2DProj(OGL2Texture,pC0-dx))+vec5)/pl,
| CR = floor(pl*sqrt(texture2DProj(OGL2Texture,pC0+dx))+vec5)/pl,
| CT = floor(pl*sqrt(texture2DProj(OGL2Texture,pC0-dy))+vec5)/pl,
| CB = floor(pl*sqrt(texture2DProj(OGL2Texture,pC0+dy))+vec5)/pl,
| TP;
| vec2 fp = fract(pC0.xy*OGL2Size.xy);
| if(fp.x >= .5){TP=CR;CR=CL;CL=TP;}
| if(fp.y >= .5){TP=CT;CT=CB;CB=TP;}
| if(CT == CL && CT != CR && CL != CB) C0 = CL;
| return C0*C0;
| | } | |
Fragment file WAScale2xCR: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | 48: | | 49: | | | | uniform vec4 OGL2Size,OGL2Param;
| uniform sampler2D OGL2Texture;
| const float pi = 1.570796326794896619231321691640;
| const vec4 pC4 = gl_TexCoord[0];
| const vec4 pl = vec4(8.);
| const vec4 vec5 = vec4(.5);
| const vec4 rx = vec4(OGL2Param.x,0.,0.,0.);
| const vec4 ry = vec4(0.,OGL2Param.y,0.,0.);
|
| vec4 wAverage();
| vec4 Scale2x(vec2, vec4);
|
| void main(){
| gl_FragColor = wAverage();
| }
|
| vec4 wAverage(){
| vec2 fp = fract(pC4.xy*OGL2Size.xy),
| s0 = vec2(1.-fp.x,1.-fp.y), s1 = vec2(1.-fp.x,fp.y),
| s2 = vec2( fp.x,1.-fp.y), s3 = vec2( fp.x,fp.y);
| vec4 dx = rx, dy = ry;
| if(fp.x >= .5){dx = -dx; fp.x = 1. - fp.x;}
| if(fp.y >= .5){dy = -dy; fp.y = 1. - fp.y;}
| fp *= 2.;
| if(fp.x >= .5){dx = vec4(0.); fp.x = 1. - fp.x;}
| if(fp.y >= .5){dy = vec4(0.); fp.y = 1. - fp.y;}
| mat2 gp = mat2((fp+.5)*(fp+.5),(fp-.5)*(fp-.5));
| vec4 c = vec4(gp[0][0]+gp[0][1],gp[0][0]+gp[1][1],
| gp[1][0]+gp[0][1],gp[1][0]+gp[1][1] ;
| mat4 C = mat4(Scale2x(s0,pC4-dx-dy), Scale2x(s1,pC4-dx),
| Scale2x(s2,pC4-dy ), Scale2x(s3,pC4 ));
| c = -sqrt(c);
| if(c.x < -1.)c.x=-1.; if(c.y < -1.)c.y=-1.; if(c.z < -1.)c.z=-1.;
| c = cos(pi*c);
| return (C[0]*c.x+C[1]*c.y+C[2]*c.z+C[3]*c.w)/(c.x+c.y+c.z+c.w);
| }
|
| vec4 Scale2x(vec2 fp, vec4 pC0){
| vec4 C0 = floor(pl*sqrt(texture2DProj(OGL2Texture,pC0 ))+vec5)/pl,
| CL = floor(pl*sqrt(texture2DProj(OGL2Texture,pC0-rx))+vec5)/pl,
| CR = floor(pl*sqrt(texture2DProj(OGL2Texture,pC0+rx))+vec5)/pl,
| CT = floor(pl*sqrt(texture2DProj(OGL2Texture,pC0-ry))+vec5)/pl,
| CB = floor(pl*sqrt(texture2DProj(OGL2Texture,pC0+ry))+vec5)/pl,
| TP;
| if(fp.x >= .5){TP=CR;CR=CL;CL=TP;}
| if(fp.y >= .5){TP=CT;CT=CB;CB=TP;}
| if(CT == CL && CT != CR && CL != CB) C0 = CL;
| return C0*C0;
| | } | |
[Dieser Beitrag wurde am 16.05.2007 - 13:06 von VerGreeneyes aktualisiert]
|
guest  Real addict
  

Status:Offline Date registered: 30.07.2004 Post:856 Send Message | Created on 16.05.2007 - 15:10 |  |
Nice work.
Sure it will look nicer when the AWF gets improoved.
[Dieser Beitrag wurde am 16.05.2007 - 15:25 von guest aktualisiert]
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 16.05.2007 - 16:28 |  |
It'd certainly be slower! The Weighed Average filter (which I made up, though the concept is nothing new aside possibly from using cosine) is pretty simple right now. Any edge detection I build in is going to kill the speed.
Guest, could you try the following Fragment file? It cuts the amount of texture lookups used by a fourth, though it's a bit hackish and slower.
Fragment file: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | 48: | | 49: | | 50: | | 51: | | 52: | | 53: | | 54: | | 55: | | 56: | | 57: | | 58: | | 59: | | 60: | | 61: | | 62: | | 63: | | 64: | | 65: | | 66: | | 67: | | 68: | | 69: | | 70: | | 71: | | 72: | | 73: | | 74: | | 75: | | 76: | | 77: | | 78: | | 79: | | 80: | | 81: | | 82: | | 83: | | 84: | | 85: | | 86: | | 87: | | 88: | | 89: | | 90: | | 91: | | 92: | | 93: | | 94: | | 95: | | 96: | | 97: | | 98: | | 99: | | 100: | | 101: | | 102: | | 103: | | 104: | | 105: | | | | uniform vec4 OGL2Size, OGL2InvSize;
| uniform sampler2D OGL2Texture;
| const float pi = 1.570796326794896619231321691640;
| const vec2 pC4 = gl_TexCoord[0].xy;
| const vec2 rx = vec2(OGL2InvSize.x,0.);
| const vec2 ry = vec2(0.,OGL2InvSize.y);
| vec2 dx, dy;
| const vec4 dt = vec4(16777216.,65536.,256.,1.);
|
| float reduce(vec4);
| float GET_RESULT(float, float, float, float);
| vec4 WA2xSaI();
| vec4 TWOxSaI(vec2, vec2);
|
| void main(){
| gl_FragColor = WA2xSaI();
| }
|
| float reduce(vec4 colour){
| return dot(colour,dt);
| }
|
| float GET_RESULT(float A, float B, float C, float D){
| return float(A != C && A != D && B == C && B == D) - float(A == C && A == D);
| }
|
| vec4 WA2xSaI(){
| vec2 fp = fract(pC4*OGL2Size.xy),
| dx = vec2(OGL2InvSize.x,0.), dy = vec2(0.,OGL2InvSize.y),
| s0 = vec2(1.-fp.x,1.-fp.y), s1 = vec2(1.-fp.x,fp.y),
| s2 = vec2( fp.x,1.-fp.y), s3 = vec2( fp.x,fp.y);
| if(fp.x >= .5){fp.x = 1. - fp.x; dx = -dx;}
| if(fp.y >= .5){fp.y = 1. - fp.y; dy = -dy;}
| fp *= 2.;
| if(fp.x >= .5){fp.x = 1. - fp.x; dx = vec2(0.);}
| if(fp.y >= .5){fp.y = 1. - fp.y; dy = vec2(0.);}
| mat4 C = mat4(TWOxSaI(s0,pC4-dx-dy),TWOxSaI(s1,pC4-dx),
| TWOxSaI(s2,pC4 -dy),TWOxSaI(s3,pC4 ));
| mat2 gp = mat2((fp+.5)*(fp+.5),(fp-.5)*(fp-.5));
| vec4 c = vec4(gp[0][0]+gp[0][1],gp[0][0]+gp[1][1],
| gp[1][0]+gp[0][1],gp[1][0]+gp[1][1] ;
| c=-sqrt(c);if(c.x < -1.)c.x=-1.;if(c.y < -1.)c.y=-1.;if(c.z < -1.)c.z=-1.;c=cos(pi*c);
| return (C[0]*c.x+C[1]*c.y+C[2]*c.z+C[3]*c.w)/(c.x+c.y+c.z+c.w);
| }
|
| vec4 TWOxSaI(vec2 fp, vec2 pC4){
| vec4 rValue;
| while(1){
| if(fp.x < .5 && fp.y < .5){
| rValue = texture2D(OGL2Texture,pC4);
| break;
| }
| if(fp.x < .5 || fp.y >= .5){dx = rx; dy = ry;}else{dx = ry; dy = rx;}
| vec4 C0 = texture2D(OGL2Texture,pC4- dx- dy),
| C1 = texture2D(OGL2Texture,pC4 - dy),
| C2 = texture2D(OGL2Texture,pC4+ dx- dy),
| C3 = texture2D(OGL2Texture,pC4- dx ),
| C4 = texture2D(OGL2Texture,pC4 ),
| C5 = texture2D(OGL2Texture,pC4+ dx ),
| C6 = texture2D(OGL2Texture,pC4- dx+ dy),
| C7 = texture2D(OGL2Texture,pC4 + dy),
| C8 = texture2D(OGL2Texture,pC4+ dx+ dy),
| D0 = texture2D(OGL2Texture,pC4- dx+2.*dy),
| D1 = texture2D(OGL2Texture,pC4 +2.*dy),
| D2 = texture2D(OGL2Texture,pC4+ dx+2.*dy),
| D4 = texture2D(OGL2Texture,pC4+2.*dx ),
| D5 = texture2D(OGL2Texture,pC4+2.*dx+ dy);
| float c0 = reduce(C0), c1 = reduce(C1), c2 = reduce(C2), c3 = reduce(C3),
| c4 = reduce(C4), c5 = reduce(C5), c6 = reduce(C6), c7 = reduce(C7),
| c8 = reduce(C8), d0 = reduce(D0), d1 = reduce(D1), d2 = reduce(D2),
| d4 = reduce(D4), d5 = reduce(D5), r;
| rValue = (c4 == c8)
| ? (c5 != c7)
| ? (fp.x < .5 || fp.y < .5)
| ? (c4 == c3 && c7 == d2 || c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0)
| ? C4
| : .5*(C4+C7)
| : C4
| : (c4 == c5)
| ? C4
| : (fp.x < .5 || fp.y < .5)
| ? .5*(C4+C7)
| : ((r = GET_RESULT(c4,c5,c3,c1)+GET_RESULT(c4,c5,d5,d2)-GET_RESULT(c5,c4,d4,c2)-GET_RESULT(c5,c4,c6,d1)) > 0.)
| ? C4
| : (r < 0.)
| ? C5
| : .25*(C4+C5+C7+C8)
| : (c5 == c7)
| ? (fp.x < .5 || fp.y < .5)
| ? (c7 == c6 && c4 == c2 || c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0)
| ? C7
| : .5*(C4+C7)
| : C5
| : (fp.x < .5 || fp.y < .5)
| ? (c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0)
| ? C4
| : (c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0)
| ? C7
| : .5*(C4+C7)
| : .25*(C4+C5+C7+C8);
| break;
| }
| return rValue;
| }
| | |
[Dieser Beitrag wurde am 16.05.2007 - 18:17 von VerGreeneyes aktualisiert]
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 16.05.2007 - 21:14 |  |
*bumps topic so edited post will be noticed*
|
guest  Real addict
  

Status:Offline Date registered: 30.07.2004 Post:856 Send Message | Created on 17.05.2007 - 04:34 |  |
Btw. u could post the "clean" faster 2xSaI ver.
PS: "My" compiler seems to require "true" instead of "1"...
[Dieser Beitrag wurde am 17.05.2007 - 11:49 von guest aktualisiert]
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 17.05.2007 - 13:18 |  |
Heh, I guess I'm a bit bent on getting this working for everyone. Still hoping to reduce the amount of texture lookups without hackish behaviour, but I haven't got any good ideas ATM. So does this version work for you?
|
guest  Real addict
  

Status:Offline Date registered: 30.07.2004 Post:856 Send Message | Created on 17.05.2007 - 15:03 |  |
It produces a "very old" adapter state which i had last with my radeon 9600.
Black screen, 0.1 FPS, have to reboot to gain normal screen scrolling...but the app doesen't crash.
Sure u can do "shorter". 
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 17.05.2007 - 15:32 |  |
Merp, I was expecting something like 12 fps going by your earlier figures.. guess ATI hardware and software is really different.
|