| Author | Topics » Book an abo for this thread |  |
guest  Real addict
  

Status:Offline Date registered: 30.07.2004 Post:856 Send Message | Created on 07.05.2007 - 13:08 |  |
Texel map:
// C0 C1 C2 D3
// C3 C4 C5 D4
// C6 C7 C8 D5
// D0 D1 D2 D6
The Scale2x algorithm is left2right, up2down symmetric , as i recall and the GLSL port uses this features to apply a simplified algorithm (it replaces some input colors on the way) regarding the fract. position.
It could be done similar with the 2xSaI algorithm. As it can be seen the "texel" lookups are extended on right and bottom sides (C4 is the central "texel" - points to up-left/bottom-right or "y=-x" symmetry axis)
If the algorithm turns out to be symmetric, the same procedure (with different color data) can be used to calculate p01 and p10.
"Product mix" or better expressed "Smooth pattern transitions":
The 2xSaI shader, as it is, branches through the patterns based on color equalities. PSX games use things as texturing, palletized textures, transparence
... Some texels can turn out to be very alike, but not the same - and the algorithm chooses to ignore this similarities. "Irregular artifacts" are produced this way...
[Dieser Beitrag wurde am 07.05.2007 - 13:47 von guest aktualisiert]
|
|
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 07.05.2007 - 14:42 |  |
Hmm, so the product mix is a quality issue rather than a speed one? We could try declaring colours equal if the difference between them is smaller than, say, 1%.. what are the minimum and maximum values for c0-c8 and d0-d6?
The symmetry 'issue' is an interesting one I may look into later. I'll need a better understanding of the algorithm for it though. (which may take some time)
Finally, here's the most recent version of my fragment file: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | 48: | | 49: | | 50: | | 51: | | 52: | | 53: | | 54: | | 55: | | 56: | | 57: | | 58: | | 59: | | 60: | | 61: | | 62: | | 63: | | 64: | | 65: | | 66: | | 67: | | 68: | | 69: | | 70: | | 71: | | 72: | | 73: | | 74: | | | | const vec3 dt = vec3(65536.0,256.0,1.0);
|
| float reduce(vec3 color){
| return dot(color,dt);
| }
|
| uniform vec4 OGL2Size;
| uniform vec4 OGL2InvSize;
| uniform sampler2D OGL2Texture;
|
| void main(){
| vec2 fp = fract(gl_TexCoord[0].xy*OGL2Size.xy),
| dx = vec2(OGL2InvSize.x,0.0 ),
| dy = vec2(0.0 ,OGL2InvSize.y),
| g1 = vec2( OGL2InvSize.x,OGL2InvSize.y),
| g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y),
| pC4 = gl_TexCoord[0].xy, pC8 = pC4 + g1;
| vec3 C0 = texture2D(OGL2Texture,pC4-g1 ).xyz,
| C1 = texture2D(OGL2Texture,pC4 -dy).xyz,
| C2 = texture2D(OGL2Texture,pC4-g2 ).xyz,
| D3 = texture2D(OGL2Texture,pC4-g2+dx).xyz,
| C3 = texture2D(OGL2Texture,pC4 -dx).xyz,
| C4 = texture2D(OGL2Texture,pC4 ).xyz,
| C5 = texture2D(OGL2Texture,pC4 +dx).xyz,
| D4 = texture2D(OGL2Texture,pC8-g2 ).xyz,
| C6 = texture2D(OGL2Texture,pC4+g2 ).xyz,
| C7 = texture2D(OGL2Texture,pC4 +dy).xyz,
| C8 = texture2D(OGL2Texture,pC8 ).xyz,
| D5 = texture2D(OGL2Texture,pC8 +dx).xyz,
| D0 = texture2D(OGL2Texture,pC4+g2+dy).xyz,
| D1 = texture2D(OGL2Texture,pC8+g2 ).xyz,
| D2 = texture2D(OGL2Texture,pC8 +dy).xyz,
| D6 = texture2D(OGL2Texture,pC8+g1 ).xyz;
| float c0 = reduce(C0), c1 = reduce(C1), c2 = reduce(C2), c3 = reduce(C3),
| c4 = reduce(C4), c5 = reduce(C5), c6 = reduce(C6), c7 = reduce(C7),
| c8 = reduce(C8), d0 = reduce(D0), d1 = reduce(D1), d2 = reduce(D2),
| d3 = reduce(D3), d4 = reduce(D4), d5 = reduce(D5), d6 = reduce(D6);
| if(c4 == c8 && c5 != c7)
| gl_FragColor.xyz = (fp.x < 0.5)
| ? (fp.y < 0.5) ? C4
| : ((c4 == c3 && c7 == d2) || (c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0)) ? C4 : 0.5*(C4+C7)
| : (fp.y < 0.5) ? ((c4 == c1 && c5 == d5) || (c4 == c7 && c4 == c2 && c5 != c1 && c5 == d3)) ? C4 : 0.5*(C4+C5)
| : C4;
| else if(c5 == c7 && c4 != c8)
| gl_FragColor.xyz = (fp.x < 0.5)
| ? (fp.y < 0.5) ? C4
| : ((c7 == c6 && c4 == c2) || (c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0)) ? C7 : 0.5*(C4+C7)
| : (fp.y < 0.5) ? ((c5 == c2 && c4 == c6) || (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)) ? C5 : 0.5*(C4+C5)
| : C5;
| else if(c4 == c8 && c5 == c7){
| if(c4 == c5) gl_FragColor.xyz = C4;
| else{
| if(fp.x < 0.5){
| if(fp.y < 0.5) gl_FragColor.xyz = C4;
| else gl_FragColor.xyz = 0.5*(C4+C7);
| }else{
| if(fp.y < 0.5) gl_FragColor.xyz = 0.5*(C4+C5);
| else{
| float r = sign(abs(c4-c3)+abs(c4-c1))+sign(abs(c4-d4)+abs(c4-c2))+sign(abs(c4-c6)+abs(c4-d1))+sign(abs(c4-d5)+abs(c4-d2))
| -sign(abs(c5-d4)+abs(c5-c2))-sign(abs(c5-c3)+abs(c5-c1))-sign(abs(c5-c6)+abs(c5-d1))-sign(abs(c5-d5)+abs(c5-d2));
| gl_FragColor.xyz = (r == 0.0) ? 0.25*(C4+C5+C7+C8) : (r > 0.0) ? C4 : C5;
| }
| }
| }
| }else{
| if(fp.x < 0.5){
| if(fp.y < 0.5) gl_FragColor.xyz = C4;
| else gl_FragColor.xyz = (c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : (c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
| }else{
| if(fp.y < 0.5) gl_FragColor.xyz = (c4 == c7 && c4 == c2 && c5 != c1 && c5 == c3) ? C4 : (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0) ? C5 : 0.5*(C4+C5);
| else gl_FragColor.xyz = 0.25*(C4+C5+C7+C8);
| }
| }
| | } | |
It's a bit faster again (for me anyway, all this seems very driver- and card-dependant), and I fixed something that may effect quality.
|
guest  Real addict
  

Status:Offline Date registered: 30.07.2004 Post:856 Send Message | Created on 07.05.2007 - 15:53 |  |
Yes, smooth pattern transitions are a quality issue.
A classic approach to re-code a shader would be to summ the color differences, search for the minimal and maximal "difference", "weigth" the product candidates for a single product (for example p01) and blend/mix them accordingly.
Adding a "threshold value" for "equality" is tricky since the algorithm is advised to stay non-contradictive and uniform. with a high threshold value some branches would become more dominant etc...
But if, then the color differences should be calculated instead and if sentences rearanged like "if (dif(C4,C8) +less+= threshold)..." instead of "if (c4==c8)..."
[Dieser Beitrag wurde am 07.05.2007 - 16:12 von guest aktualisiert]
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 08.05.2007 - 01:19 |  |
I've been studying the documentation of the shader language this evening, and I'm getting a better grasp of it.
Here's my current fragment file, just some minor syntaxic tweaks and some optimisation in getting the values (for a tiny speedup): | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | 48: | | 49: | | 50: | | 51: | | 52: | | 53: | | 54: | | 55: | | 56: | | 57: | | 58: | | 59: | | 60: | | 61: | | 62: | | 63: | | 64: | | 65: | | | | const vec3 dt = vec3(65536.,256.,1.);
| const vec2 pC4 = gl_TexCoord[0].xy;
| uniform vec4 OGL2Size;
| uniform vec4 OGL2InvSize;
| uniform sampler2D OGL2Texture;
|
| vec3 set(vec2 coord){
| return texture2D(OGL2Texture,pC4+coord).xyz;
| }
|
| float reduce(vec3 color){
| return dot(color,dt);
| }
|
| void main(){
| vec2 dx = vec2( OGL2InvSize.x,0. ),
| dy = vec2( 0. ,OGL2InvSize.y),
| g1 = vec2( OGL2InvSize.x,OGL2InvSize.y),
| g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y),
| fp = fract(pC4*OGL2Size.xy);
| vec3 C0 = set(-g1), C1 = set(-dy), C2 = set(-g2), C3 = set(-dx),
| C4 = set(vec2(0.)), C5 = set(dx), C6 = set(g2), C7 = set(dy),
| C8 = set(g1), D0 = set(g2+dy), D1 = set(g1+g2), D2 = set(g1+dy),
| D3 = set(-g2+dx), D4 = set(g1-g2), D5 = set(g1+dx), D6 = set(g1+g1);
| float c0 = reduce(C0), c1 = reduce(C1), c2 = reduce(C2), c3 = reduce(C3),
| c4 = reduce(C4), c5 = reduce(C5), c6 = reduce(C6), c7 = reduce(C7),
| c8 = reduce(C8), d0 = reduce(D0), d1 = reduce(D1), d2 = reduce(D2),
| d3 = reduce(D3), d4 = reduce(D4), d5 = reduce(D5), d6 = reduce(D6);
| if(c4 == c8 && c5 != c7)
| gl_FragColor.xyz = (fp.x < 0.5)
| ? (fp.y < 0.5) ? C4
| : ((c4 == c3 && c7 == d2) || (c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0)) ? C4 : 0.5*(C4+C7)
| : (fp.y < 0.5) ? ((c4 == c1 && c5 == d5) || (c4 == c7 && c4 == c2 && c5 != c1 && c5 == d3)) ? C4 : 0.5*(C4+C5)
| : C4;
| else if(c5 == c7 && c4 != c8)
| gl_FragColor.xyz = (fp.x < 0.5)
| ? (fp.y < 0.5) ? C4
| : ((c7 == c6 && c4 == c2) || (c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0)) ? C7 : 0.5*(C4+C7)
| : (fp.y < 0.5) ? ((c5 == c2 && c4 == c6) || (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)) ? C5 : 0.5*(C4+C5)
| : C5;
| else if(c4 == c8 && c5 == c7){
| if(c4 == c5) gl_FragColor.xyz = C4;
| else{
| if(fp.x < 0.5){
| if(fp.y < 0.5) gl_FragColor.xyz = C4;
| else gl_FragColor.xyz = 0.5*(C4+C7);
| }else{
| if(fp.y < 0.5) gl_FragColor.xyz = 0.5*(C4+C5);
| else{
| float r = sign(abs(c4-c3)+abs(c4-c1))+sign(abs(c4-d4)+abs(c4-c2))+sign(abs(c4-c6)+abs(c4-d1))+sign(abs(c4-d5)+abs(c4-d2))
| -sign(abs(c5-d4)+abs(c5-c2))-sign(abs(c5-c3)+abs(c5-c1))-sign(abs(c5-c6)+abs(c5-d1))-sign(abs(c5-d5)+abs(c5-d2));
| gl_FragColor.xyz = (r == 0.) ? 0.25*(C4+C5+C7+C8) : (r > 0.) ? C4 : C5;
| }
| }
| }
| }else{
| if(fp.x < 0.5){
| if(fp.y < 0.5) gl_FragColor.xyz = C4;
| else gl_FragColor.xyz = (c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : (c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
| }else{
| if(fp.y < 0.5) gl_FragColor.xyz = (c4 == c7 && c4 == c2 && c5 != c1 && c5 == c3) ? C4 : (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0) ? C5 : 0.5*(C4+C5);
| else gl_FragColor.xyz = 0.25*(C4+C5+C7+C8);
| }
| }
| | } | |
I hope I'll be able to do some more major things once I get further through the document.
Edit: hah, this version is actually slower on my desktop's 6600GT.. well, this is getting silly, I won't be optimising for it anymore, but rather for my laptop's Go 7700 (which should better reflect modern cards and can get much better speed). Currently getting between 44 and 45 fps on it, hope I can get it up to 60 somehow!
[Dieser Beitrag wurde am 08.05.2007 - 01:31 von VerGreeneyes aktualisiert]
|
guest  Real addict
  

Status:Offline Date registered: 30.07.2004 Post:856 Send Message | Created on 08.05.2007 - 20:29 |  |
Hey Ver!
I exploited the symmetry feat. 
Seems to run bit faster...
Regards, guest.r
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 08.05.2007 - 21:07 |  |
Cool! From what I'd seen of the filter I wasn't sure there was one, so I'm eager to see what you changed. Off to test it (and mess with it) now!
Edit: a quick question. Do you know if the latest ATI drivers support 'conditional returns'? | | | | | if(fp.x < .5 && fp.y < .5) return; | |
[Dieser Beitrag wurde am 09.05.2007 - 00:06 von VerGreeneyes aktualisiert]
|
guest  Real addict
  

Status:Offline Date registered: 30.07.2004 Post:856 Send Message | Created on 09.05.2007 - 09:17 |  |
It's a more apropriate question if conditional returns are included in GLSL.
I'm still reading the pfd i dl'ed 2004.
Once i stated a capable polynomial series engine is everything a capable coder needs.
...
Everything else is basicly non-base stuff.
[Dieser Beitrag wurde am 09.05.2007 - 13:29 von guest aktualisiert]
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 09.05.2007 - 09:24 |  |
I've seen nothing to indicate that they're included, but who would use an unconditional return in a void function? Anyway, I've found that while the nvidia ForceWare 94.20 drivers don't support them, 158.22 and 165.01 (which are still in beta) -do-. The ATI driver scene is a lot simpler to check though.
|
VerGreeneyes Strong supporter
 

Status:Offline Date registered: 26.04.2007 Post:89 Send Message | Created on 09.05.2007 - 13:52 |  |

It's by no means constant (it'll go as low *cough* as 58fps) but the magic number has been reached!
vertex file:
| | | | void main(){
| gl_Position = ftransform();
| gl_TexCoord[0] = gl_MultiTexCoord0;
| | } | | fragment file: | Code: | | 1: | | 2: | | 3: | | 4: | | 5: | | 6: | | 7: | | 8: | | 9: | | 10: | | 11: | | 12: | | 13: | | 14: | | 15: | | 16: | | 17: | | 18: | | 19: | | 20: | | 21: | | 22: | | 23: | | 24: | | 25: | | 26: | | 27: | | 28: | | 29: | | 30: | | 31: | | 32: | | 33: | | 34: | | 35: | | 36: | | 37: | | 38: | | 39: | | 40: | | 41: | | 42: | | 43: | | 44: | | 45: | | 46: | | 47: | | 48: | | 49: | | 50: | | 51: | | 52: | | 53: | | 54: | | 55: | | 56: | | 57: | | 58: | | 59: | | 60: | | 61: | | 62: | | 63: | | 64: | | 65: | | 66: | | 67: | | 68: | | 69: | | 70: | | 71: | | 72: | | 73: | | 74: | | 75: | | 76: | | 77: | | 78: | | 79: | | | | const vec3 dt = vec3(65536.,256.,1.);
| uniform vec4 OGL2Size, OGL2InvSize;
| uniform sampler2D OGL2Texture;
|
| float reduce(vec3 color){return dot(color,dt);}
|
| void main(){
| vec2 pC4 = gl_TexCoord[0].xy, fp = fract(pC4*OGL2Size.xy),
| dx = vec2( OGL2InvSize.x,0. ), dy = vec2( 0. ,OGL2InvSize.y),
| g1 = vec2( OGL2InvSize.x,OGL2InvSize.y), g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y);
| if(fp.x < .5 && fp.y >= .5) g2*=-1.;
| vec2 g3 = .5*(g1+g2), g4 = .5*(g1-g2);
| vec3 C0 = texture2D(OGL2Texture,pC4-g1).xyz, C1 = texture2D(OGL2Texture,pC4-g3).xyz,
| C2 = texture2D(OGL2Texture,pC4-g2).xyz, C3 = texture2D(OGL2Texture,pC4-g4).xyz,
| C4 = texture2D(OGL2Texture,pC4 ).xyz, C5 = texture2D(OGL2Texture,pC4+g4).xyz,
| C6 = texture2D(OGL2Texture,pC4+g2).xyz, C7 = texture2D(OGL2Texture,pC4+g3).xyz,
| C8 = texture2D(OGL2Texture,pC4+g1).xyz, D0 = texture2D(OGL2Texture,pC4+g2+g3).xyz,
| D1 = texture2D(OGL2Texture,pC4+g1+g2).xyz, D2 = texture2D(OGL2Texture,pC4+1.5*g1).xyz,
| D3 = texture2D(OGL2Texture,pC4-g2+g4).xyz, D4 = texture2D(OGL2Texture,pC4+g1-g2).xyz,
| D5 = texture2D(OGL2Texture,pC4+g1+g4).xyz, D6 = texture2D(OGL2Texture,pC4+2.*g1).xyz;
| float c0 = reduce(C0), c1 = reduce(C1), c2 = reduce(C2), c3 = reduce(C3),
| c4 = reduce(C4), c5 = reduce(C5), c6 = reduce(C6), c7 = reduce(C7),
| c8 = reduce(C8), d0 = reduce(D0), d1 = reduce(D1), d2 = reduce(D2),
| d3 = reduce(D3), d4 = reduce(D4), d5 = reduce(D5), d6 = reduce(D6), r;
| gl_FragColor.xyz = (c4 == c8 && c5 != c7)
| ? (fp.x < .5)
| ? (fp.y < .5)
| ? C4
| : (c4 == c1 && c5 == d5 || c4 == c7 && c4 == c2 && c5 != c1 && c5 == d3)
| ? C4
| : .5*(C4+C5)
| : (fp.y < .5)
| ? (c4 == c1 && c5 == d5 || c4 == c7 && c4 == c2 && c5 != c1 && c5 == d3)
| ? C4
| : .5*(C4+C5)
| : C4
| : (c5 == c7 && c4 != c8)
| ? (fp.x < .5)
| ? (fp.y < .5)
| ? C4
| : (c5 == c2 && c4 == c6 || c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)
| ? C5
| : .5*(C4+C5)
| : (fp.y < .5)
| ? (c5 == c2 && c4 == c6 || c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)
| ? C5
| : .5*(C4+C5)
| : C5
| : (c4 == c8 && c5 == c7)
| ? (c4 == c5)
| ? C4
| : (fp.x < .5)
| ? (fp.y < .5)
| ? C4
| : .5*(C4+C5)
| : (fp.y < .5)
| ? .5*(C4+C5)
| : ((r = sign(abs(c4-c3)+abs(c4-c1))+sign(abs(c4-d4)+abs(c4-c2))+sign(abs(c4-c6)+abs(c4-d1))+sign(abs(c4-d5)+abs(c4-d2))
| -sign(abs(c5-d4)+abs(c5-c2))-sign(abs(c5-c3)+abs(c5-c1))-sign(abs(c5-c6)+abs(c5-d1))-sign(abs(c5-d5)+abs(c5-d2))) == 0.)
| ? .25*(C4+C5+C7+C8)
| : (r > 0.)
| ? C4
| : C5
| : (fp.x < .5)
| ? (fp.y < .5)
| ? C4
| : (c4 == c7 && c4 == c2 && c5 != c1 && c5 == c3)
| ? C4
| : (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)
| ? C5
| : .5*(C4+C5)
| : (fp.y < .5)
| ? (c4 == c7 && c4 == c2 && c5 != c1 && c5 == c3)
| ? C4
| : (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)
| ? C5
| : .5*(C4+C5)
| : .25*(C4+C5+C7+C8);
| | } | |
Enjoy!
PS: your mileage may vary.. (I would not be surprised to find that this code is slower on my 6600GT than your original.. luckily the Go 7700 seems to be a lot more logical when it comes to optimisation)
|
guest  Real addict
  

Status:Offline Date registered: 30.07.2004 Post:856 Send Message | Created on 09.05.2007 - 14:12 |  |
My version reaches avg. 65 FPS with sprite games and up to 200 FPS with other (3D) games (1280x1024).
I think the OGL2 plugin can be altered to handle sprites more efficiently, so i'm optimistic.
[Dieser Beitrag wurde am 17.05.2007 - 21:39 von guest aktualisiert]
|