Pete´s Messageboard... No ISO/BIOS requests!

Homepage Members Register Login Search Old board


Neuer Thread ...
More : [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]


AuthorTopics » Book an abo for this threadClose Thread Move Thread Fix the thread Print view Delete this thread

guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 07.05.2007 - 13:08Jump to top Quote this post Report this post Edit Delete


Texel map:

// C0 C1 C2 D3
// C3 C4 C5 D4
// C6 C7 C8 D5
// D0 D1 D2 D6

The Scale2x algorithm is left2right, up2down symmetric , as i recall and the GLSL port uses this features to apply a simplified algorithm (it replaces some input colors on the way) regarding the fract. position.

It could be done similar with the 2xSaI algorithm. As it can be seen the "texel" lookups are extended on right and bottom sides (C4 is the central "texel" - points to up-left/bottom-right or "y=-x" symmetry axis)

If the algorithm turns out to be symmetric, the same procedure (with different color data) can be used to calculate p01 and p10.


"Product mix" or better expressed "Smooth pattern transitions":

The 2xSaI shader, as it is, branches through the patterns based on color equalities. PSX games use things as texturing, palletized textures, transparence
... Some texels can turn out to be very alike, but not the same - and the algorithm chooses to ignore this similarities. "Irregular artifacts" are produced this way...

[Dieser Beitrag wurde am 07.05.2007 - 13:47 von guest aktualisiert]




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 07.05.2007 - 14:42Jump to top Quote this post Report this post Edit Delete


Hmm, so the product mix is a quality issue rather than a speed one? We could try declaring colours equal if the difference between them is smaller than, say, 1%.. what are the minimum and maximum values for c0-c8 and d0-d6?

The symmetry 'issue' is an interesting one I may look into later. I'll need a better understanding of the algorithm for it though. (which may take some time)

Finally, here's the most recent version of my fragment file:

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
 
const vec3 dt = vec3(65536.0,256.0,1.0);

float reduce(vec3 color){
    return dot(color,dt);
}

uniform vec4 OGL2Size;
uniform vec4 OGL2InvSize;
uniform sampler2D OGL2Texture;

void main(){
    vec2  fp  = fract(gl_TexCoord[0].xy*OGL2Size.xy),
          dx  = vec2(OGL2InvSize.x,0.0           ),
          dy  = vec2(0.0           ,OGL2InvSize.y),
          g1  = vec2( OGL2InvSize.x,OGL2InvSize.y),
          g2  = vec2(-OGL2InvSize.x,OGL2InvSize.y),
          pC4 = gl_TexCoord[0].xy, pC8 = pC4 + g1;
    vec3  C0  = texture2D(OGL2Texture,pC4-g1   ).xyz,
          C1  = texture2D(OGL2Texture,pC4   -dy).xyz,
          C2  = texture2D(OGL2Texture,pC4-g2   ).xyz,
          D3  = texture2D(OGL2Texture,pC4-g2+dx).xyz,
          C3  = texture2D(OGL2Texture,pC4   -dx).xyz,
          C4  = texture2D(OGL2Texture,pC4      ).xyz,
          C5  = texture2D(OGL2Texture,pC4   +dx).xyz,
          D4  = texture2D(OGL2Texture,pC8-g2   ).xyz,
          C6  = texture2D(OGL2Texture,pC4+g2   ).xyz,
          C7  = texture2D(OGL2Texture,pC4   +dy).xyz,
          C8  = texture2D(OGL2Texture,pC8      ).xyz,
          D5  = texture2D(OGL2Texture,pC8   +dx).xyz,
          D0  = texture2D(OGL2Texture,pC4+g2+dy).xyz,
          D1  = texture2D(OGL2Texture,pC8+g2   ).xyz,
          D2  = texture2D(OGL2Texture,pC8   +dy).xyz,
          D6  = texture2D(OGL2Texture,pC8+g1   ).xyz;
    float c0 = reduce(C0), c1 = reduce(C1), c2 = reduce(C2), c3 = reduce(C3),
          c4 = reduce(C4), c5 = reduce(C5), c6 = reduce(C6), c7 = reduce(C7),
          c8 = reduce(C8), d0 = reduce(D0), d1 = reduce(D1), d2 = reduce(D2),
          d3 = reduce(D3), d4 = reduce(D4), d5 = reduce(D5), d6 = reduce(D6);
    if(c4 == c8 && c5 != c7)
        gl_FragColor.xyz = (fp.x < 0.5)
          ? (fp.y < 0.5) ? C4
                         : ((c4 == c3 && c7 == d2) || (c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0)) ? C4 : 0.5*(C4+C7)
          : (fp.y < 0.5) ? ((c4 == c1 && c5 == d5) || (c4 == c7 && c4 == c2 && c5 != c1 && c5 == d3)) ? C4 : 0.5*(C4+C5)
                         : C4;
    else if(c5 == c7 && c4 != c8)
        gl_FragColor.xyz = (fp.x < 0.5)
          ? (fp.y < 0.5) ? C4
                         : ((c7 == c6 && c4 == c2) || (c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0)) ? C7 : 0.5*(C4+C7)
          : (fp.y < 0.5) ? ((c5 == c2 && c4 == c6) || (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)) ? C5 : 0.5*(C4+C5)
                         : C5;
    else if(c4 == c8 && c5 == c7){
        if(c4 == c5) gl_FragColor.xyz = C4;
        else{
            if(fp.x < 0.5){
                if(fp.y < 0.5) gl_FragColor.xyz = C4;
                else gl_FragColor.xyz = 0.5*(C4+C7);
            }else{
                if(fp.y < 0.5) gl_FragColor.xyz = 0.5*(C4+C5);
                else{
                    float r = sign(abs(c4-c3)+abs(c4-c1))+sign(abs(c4-d4)+abs(c4-c2))+sign(abs(c4-c6)+abs(c4-d1))+sign(abs(c4-d5)+abs(c4-d2))
                             -sign(abs(c5-d4)+abs(c5-c2))-sign(abs(c5-c3)+abs(c5-c1))-sign(abs(c5-c6)+abs(c5-d1))-sign(abs(c5-d5)+abs(c5-d2));
                    gl_FragColor.xyz = (r == 0.0) ? 0.25*(C4+C5+C7+C8) : (r > 0.0) ? C4 : C5;
                }
            }
        }
    }else{
        if(fp.x < 0.5){
            if(fp.y < 0.5) gl_FragColor.xyz = C4;
            else gl_FragColor.xyz = (c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : (c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
        }else{
            if(fp.y < 0.5) gl_FragColor.xyz = (c4 == c7 && c4 == c2 && c5 != c1 && c5 == c3) ? C4 : (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0) ? C5 : 0.5*(C4+C5);
            else gl_FragColor.xyz = 0.25*(C4+C5+C7+C8);
        }
    }
}


It's a bit faster again (for me anyway, all this seems very driver- and card-dependant), and I fixed something that may effect quality.




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 07.05.2007 - 15:53Jump to top Quote this post Report this post Edit Delete


Yes, smooth pattern transitions are a quality issue.

A classic approach to re-code a shader would be to summ the color differences, search for the minimal and maximal "difference", "weigth" the product candidates for a single product (for example p01) and blend/mix them accordingly.

Adding a "threshold value" for "equality" is tricky since the algorithm is advised to stay non-contradictive and uniform. with a high threshold value some branches would become more dominant etc...

But if, then the color differences should be calculated instead and if sentences rearanged like "if (dif(C4,C8) +less+= threshold)..." instead of "if (c4==c8)..."

[Dieser Beitrag wurde am 07.05.2007 - 16:12 von guest aktualisiert]




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 08.05.2007 - 01:19Jump to top Quote this post Report this post Edit Delete


I've been studying the documentation of the shader language this evening, and I'm getting a better grasp of it.

Here's my current fragment file, just some minor syntaxic tweaks and some optimisation in getting the values (for a tiny speedup):

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
 
const vec3 dt = vec3(65536.,256.,1.);
const vec2 pC4 = gl_TexCoord[0].xy;
uniform vec4 OGL2Size;
uniform vec4 OGL2InvSize;
uniform sampler2D OGL2Texture;

vec3 set(vec2 coord){
    return texture2D(OGL2Texture,pC4+coord).xyz;
}

float reduce(vec3 color){
    return dot(color,dt);
}

void main(){
    vec2  dx = vec2( OGL2InvSize.x,0.           ),
          dy = vec2( 0.           ,OGL2InvSize.y),
          g1 = vec2( OGL2InvSize.x,OGL2InvSize.y),
          g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y),
          fp = fract(pC4*OGL2Size.xy);
    vec3  C0 = set(-g1),      C1 = set(-dy),   C2 = set(-g2),   C3 = set(-dx),
          C4 = set(vec2(0.)), C5 = set(dx),    C6 = set(g2),    C7 = set(dy),
          C8 = set(g1),       D0 = set(g2+dy), D1 = set(g1+g2), D2 = set(g1+dy),
          D3 = set(-g2+dx),   D4 = set(g1-g2), D5 = set(g1+dx), D6 = set(g1+g1);
    float c0 = reduce(C0), c1 = reduce(C1), c2 = reduce(C2), c3 = reduce(C3),
          c4 = reduce(C4), c5 = reduce(C5), c6 = reduce(C6), c7 = reduce(C7),
          c8 = reduce(C8), d0 = reduce(D0), d1 = reduce(D1), d2 = reduce(D2),
          d3 = reduce(D3), d4 = reduce(D4), d5 = reduce(D5), d6 = reduce(D6);
    if(c4 == c8 && c5 != c7)
        gl_FragColor.xyz = (fp.x < 0.5)
          ? (fp.y < 0.5) ? C4
                         : ((c4 == c3 && c7 == d2) || (c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0)) ? C4 : 0.5*(C4+C7)
          : (fp.y < 0.5) ? ((c4 == c1 && c5 == d5) || (c4 == c7 && c4 == c2 && c5 != c1 && c5 == d3)) ? C4 : 0.5*(C4+C5)
                         : C4;
    else if(c5 == c7 && c4 != c8)
        gl_FragColor.xyz = (fp.x < 0.5)
          ? (fp.y < 0.5) ? C4
                         : ((c7 == c6 && c4 == c2) || (c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0)) ? C7 : 0.5*(C4+C7)
          : (fp.y < 0.5) ? ((c5 == c2 && c4 == c6) || (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)) ? C5 : 0.5*(C4+C5)
                         : C5;
    else if(c4 == c8 && c5 == c7){
        if(c4 == c5) gl_FragColor.xyz = C4;
        else{
            if(fp.x < 0.5){
                if(fp.y < 0.5) gl_FragColor.xyz = C4;
                else gl_FragColor.xyz = 0.5*(C4+C7);
            }else{
                if(fp.y < 0.5) gl_FragColor.xyz = 0.5*(C4+C5);
                else{
                    float r = sign(abs(c4-c3)+abs(c4-c1))+sign(abs(c4-d4)+abs(c4-c2))+sign(abs(c4-c6)+abs(c4-d1))+sign(abs(c4-d5)+abs(c4-d2))
                             -sign(abs(c5-d4)+abs(c5-c2))-sign(abs(c5-c3)+abs(c5-c1))-sign(abs(c5-c6)+abs(c5-d1))-sign(abs(c5-d5)+abs(c5-d2));
                    gl_FragColor.xyz = (r == 0.) ? 0.25*(C4+C5+C7+C8) : (r > 0.) ? C4 : C5;
                }
            }
        }
    }else{
        if(fp.x < 0.5){
            if(fp.y < 0.5) gl_FragColor.xyz = C4;
            else gl_FragColor.xyz = (c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : (c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
        }else{
            if(fp.y < 0.5) gl_FragColor.xyz = (c4 == c7 && c4 == c2 && c5 != c1 && c5 == c3) ? C4 : (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0) ? C5 : 0.5*(C4+C5);
            else gl_FragColor.xyz = 0.25*(C4+C5+C7+C8);
        }
    }
}


I hope I'll be able to do some more major things once I get further through the document.

Edit: hah, this version is actually slower on my desktop's 6600GT.. well, this is getting silly, I won't be optimising for it anymore, but rather for my laptop's Go 7700 (which should better reflect modern cards and can get much better speed). Currently getting between 44 and 45 fps on it, hope I can get it up to 60 somehow!

[Dieser Beitrag wurde am 08.05.2007 - 01:31 von VerGreeneyes aktualisiert]




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 08.05.2007 - 20:29Jump to top Quote this post Report this post Edit Delete


Hey Ver!

I exploited the symmetry feat.
Seems to run bit faster...

Regards, guest.r




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 08.05.2007 - 21:07Jump to top Quote this post Report this post Edit Delete


Cool! From what I'd seen of the filter I wasn't sure there was one, so I'm eager to see what you changed. Off to test it (and mess with it) now!

Edit: a quick question. Do you know if the latest ATI drivers support 'conditional returns'?

Code:
1:
 
if(fp.x < .5 && fp.y < .5) return;


[Dieser Beitrag wurde am 09.05.2007 - 00:06 von VerGreeneyes aktualisiert]




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 09.05.2007 - 09:17Jump to top Quote this post Report this post Edit Delete


It's a more apropriate question if conditional returns are included in GLSL.
I'm still reading the pfd i dl'ed 2004.

Once i stated a capable polynomial series engine is everything a capable coder needs.

...

Everything else is basicly non-base stuff.

[Dieser Beitrag wurde am 09.05.2007 - 13:29 von guest aktualisiert]




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 09.05.2007 - 09:24Jump to top Quote this post Report this post Edit Delete


I've seen nothing to indicate that they're included, but who would use an unconditional return in a void function? Anyway, I've found that while the nvidia ForceWare 94.20 drivers don't support them, 158.22 and 165.01 (which are still in beta) -do-. The ATI driver scene is a lot simpler to check though.




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 09.05.2007 - 13:52Jump to top Quote this post Report this post Edit Delete




It's by no means constant (it'll go as low *cough* as 58fps) but the magic number has been reached!

vertex file:

Code:
1:
2:
3:
4:
 
void main(){
    gl_Position = ftransform();
    gl_TexCoord[0] = gl_MultiTexCoord0;
}
fragment file:
Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
 
const vec3 dt = vec3(65536.,256.,1.);
uniform vec4 OGL2Size, OGL2InvSize;
uniform sampler2D OGL2Texture;

float reduce(vec3 color){return dot(color,dt);}

void main(){
    vec2 pC4 = gl_TexCoord[0].xy, fp = fract(pC4*OGL2Size.xy),
         dx = vec2( OGL2InvSize.x,0.           ), dy = vec2( 0.           ,OGL2InvSize.y),
         g1 = vec2( OGL2InvSize.x,OGL2InvSize.y), g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y);
    if(fp.x < .5 && fp.y >= .5) g2*=-1.;
    vec2 g3 = .5*(g1+g2), g4 = .5*(g1-g2);
    vec3 C0 = texture2D(OGL2Texture,pC4-g1).xyz, C1 = texture2D(OGL2Texture,pC4-g3).xyz,
         C2 = texture2D(OGL2Texture,pC4-g2).xyz, C3 = texture2D(OGL2Texture,pC4-g4).xyz,
         C4 = texture2D(OGL2Texture,pC4   ).xyz, C5 = texture2D(OGL2Texture,pC4+g4).xyz,
         C6 = texture2D(OGL2Texture,pC4+g2).xyz, C7 = texture2D(OGL2Texture,pC4+g3).xyz,
         C8 = texture2D(OGL2Texture,pC4+g1).xyz, D0 = texture2D(OGL2Texture,pC4+g2+g3).xyz,
         D1 = texture2D(OGL2Texture,pC4+g1+g2).xyz, D2 = texture2D(OGL2Texture,pC4+1.5*g1).xyz,
         D3 = texture2D(OGL2Texture,pC4-g2+g4).xyz, D4 = texture2D(OGL2Texture,pC4+g1-g2).xyz,
         D5 = texture2D(OGL2Texture,pC4+g1+g4).xyz, D6 = texture2D(OGL2Texture,pC4+2.*g1).xyz;
    float c0 = reduce(C0), c1 = reduce(C1), c2 = reduce(C2), c3 = reduce(C3),
          c4 = reduce(C4), c5 = reduce(C5), c6 = reduce(C6), c7 = reduce(C7),
          c8 = reduce(C8), d0 = reduce(D0), d1 = reduce(D1), d2 = reduce(D2),
          d3 = reduce(D3), d4 = reduce(D4), d5 = reduce(D5), d6 = reduce(D6), r;
    gl_FragColor.xyz = (c4 == c8 && c5 != c7)
        ? (fp.x < .5)
            ? (fp.y < .5)
                ? C4
                : (c4 == c1 && c5 == d5 || c4 == c7 && c4 == c2 && c5 != c1 && c5 == d3)
                    ? C4
                    : .5*(C4+C5)
            : (fp.y < .5)
                ? (c4 == c1 && c5 == d5 || c4 == c7 && c4 == c2 && c5 != c1 && c5 == d3)
                    ? C4
                    : .5*(C4+C5)
                : C4
        : (c5 == c7 && c4 != c8)
            ? (fp.x < .5)
                ? (fp.y < .5)
                    ? C4
                    : (c5 == c2 && c4 == c6 || c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)
                        ? C5
                        : .5*(C4+C5)
                : (fp.y < .5)
                    ? (c5 == c2 && c4 == c6 || c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)
                        ? C5
                        : .5*(C4+C5)
                    : C5
            : (c4 == c8 && c5 == c7)
                ? (c4 == c5)
                    ? C4
                    : (fp.x < .5)
                        ? (fp.y < .5)
                            ? C4
                            : .5*(C4+C5)
                        : (fp.y < .5)
                            ? .5*(C4+C5)
                            : ((r = sign(abs(c4-c3)+abs(c4-c1))+sign(abs(c4-d4)+abs(c4-c2))+sign(abs(c4-c6)+abs(c4-d1))+sign(abs(c4-d5)+abs(c4-d2))
                                   -sign(abs(c5-d4)+abs(c5-c2))-sign(abs(c5-c3)+abs(c5-c1))-sign(abs(c5-c6)+abs(c5-d1))-sign(abs(c5-d5)+abs(c5-d2))) == 0.)
                                ? .25*(C4+C5+C7+C8)
                                : (r > 0.)
                                    ? C4
                                    : C5
                : (fp.x < .5)
                    ? (fp.y < .5)
                        ? C4
                        : (c4 == c7 && c4 == c2 && c5 != c1 && c5 == c3)
                            ? C4
                            : (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)
                                ? C5
                                : .5*(C4+C5)
                    : (fp.y < .5)
                        ? (c4 == c7 && c4 == c2 && c5 != c1 && c5 == c3)
                            ? C4
                            : (c5 == c1 && c5 == c8 && c4 != c2 && c4 == c0)
                                ? C5
                                : .5*(C4+C5)
                        : .25*(C4+C5+C7+C8);
}


Enjoy!

PS: your mileage may vary.. (I would not be surprised to find that this code is slower on my 6600GT than your original.. luckily the Go 7700 seems to be a lot more logical when it comes to optimisation)




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 09.05.2007 - 14:12Jump to top Quote this post Report this post Edit Delete


My version reaches avg. 65 FPS with sprite games and up to 200 FPS with other (3D) games (1280x1024).

I think the OGL2 plugin can be altered to handle sprites more efficiently, so i'm optimistic.

[Dieser Beitrag wurde am 17.05.2007 - 21:39 von guest aktualisiert]




More : [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]

Similarly threads:
Topics Created by Replies Boardname
Cartoon GLSL shader guest 7 pete_bernert
Is there a OGL2 2xSaI Shader out there? GreenImp 0 pete_bernert
Scale2x Plus GLSL shader guest 0 pete_bernert
SuperEagle GLSL shader guest 0 pete_bernert
Super2xSaI GLSL shader guest 0 pete_bernert
Neuer Thread ...





Masthead

This forum is a free service of razyboard.com powered by:
Geizkragen Price Comparison. Top product in the price comparison: Krups Nespresso Essenza (XN2001)
Do you want a free forum in less than two minutes? Then click here!



Verwandte Suchbegriffe:
2xsai documentation | mess 2xsai | threshold glsl shader
blank