Pete´s Messageboard... No ISO/BIOS requests!

Homepage Members Register Login Search Old board


Neuer Thread ...
More : [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]


AuthorTopics » Book an abo for this threadClose Thread Move Thread Fix the thread Print view Delete this thread

VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 09.05.2007 - 16:25Jump to top Quote this post Report this post Edit Delete


Damn, I knew it was too good to be true.. looks like I messed something up again, because some pixels have the wrong colour. Fixing this will definitely hurt performance..




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 09.05.2007 - 16:38Jump to top Quote this post Report this post Edit Delete


One optimisation opens up another...

Generaly looking, the port got 2xfaster as it was initially (i.e. from 35 to 70 avg. FPS on a x1650pro/1280x1024 - for sprite games like lunar).

Ver: i figured out some colors can be ignored (D3, D6) in my later versions. Sure u can do the same.




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 09.05.2007 - 16:56Jump to top Quote this post Report this post Edit Delete


Yep, I noticed ^_^

Might I suggest this code for assigning the values? It's somewhat simpler and a bit faster on both my cards:

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
 
    vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy,
         fp  = fract(OGL2Pos),
         dx  = vec2( OGL2InvSize.x,          0.0),
         dy  = vec2( 0.0,          OGL2InvSize.y),
         g1  = vec2( OGL2InvSize.x,OGL2InvSize.y),
         g2  = vec2(-OGL2InvSize.x,OGL2InvSize.y);
    if(fp.x >= .5 && fp.y < .5) g2*=-1.0;
    vec2 g3 = .5*(g1+g2), g4 = .5*(g1-g2),
         pC4 = floor(OGL2Pos)/OGL2Size.xy;
    vec3 C0 = texture2D(OGL2Texture,pC4-g1   ).xyz,
         C1 = texture2D(OGL2Texture,pC4-g3   ).xyz,
         C2 = texture2D(OGL2Texture,pC4-g2   ).xyz,
         C3 = texture2D(OGL2Texture,pC4-g4   ).xyz,
         C4 = texture2D(OGL2Texture,pC4      ).xyz,
         C5 = texture2D(OGL2Texture,pC4+g4   ).xyz,
         C6 = texture2D(OGL2Texture,pC4+g2   ).xyz,
         C7 = texture2D(OGL2Texture,pC4+g3   ).xyz,
         C8 = texture2D(OGL2Texture,pC4+g1   ).xyz,
         D0 = texture2D(OGL2Texture,pC4+g2+g3).xyz,
         D1 = texture2D(OGL2Texture,pC4+g1+g2).xyz,
         D2 = texture2D(OGL2Texture,pC4+g1+g3).xyz,
         D4 = texture2D(OGL2Texture,pC4+g1-g2).xyz,
         D5 = texture2D(OGL2Texture,pC4+g1+g4).xyz,
         p10,p11;


[Dieser Beitrag wurde am 09.05.2007 - 17:08 von VerGreeneyes aktualisiert]




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 09.05.2007 - 18:23Jump to top Quote this post Report this post Edit Delete


OK., i figured how it works faster too...

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
 

 /*
  2xSaI GLSL shader 

           - Copyright (C) 2007 guest(r) - guest.r@gmail.com

           - License: GNU-GPL  


   The 2xSaI algorithm

           - Copyright (c) 1999-2001 by Derek Liauw Kie Fa.

*/


const vec3 dt = vec3(65536.0,256.0,1.0);



float GET_RESULT(float A, float B, float C, float D)
{
    return (sign(abs(A-C)+abs(A-D)) - sign(abs(B-C)+abs(B-D))); 
}


float reduce(vec3 color)

    return dot(color,dt);
}


uniform vec4 OGL2Size;
uniform vec4 OGL2InvSize;
uniform sampler2D OGL2Texture;

void main()
{
    
    // Calculating texel coordinates

    vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy;
    vec2 fp = fract(OGL2Pos);
    vec2 g1 = vec2( OGL2InvSize.x,OGL2InvSize.y);
    vec2 g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y);

    if (fp.x >= 0.50 && fp.y < 0.50) g2*=-1.0;

    vec2 pC4 = floor(OGL2Pos)/OGL2Size.xy;
    vec2 pC8 = pC4 + g1;
    vec2 pC0 = pC4 - g1;

    vec2 p04 = pC4 - 0.5*g1;
    vec2 pC3 = p04 + 0.5*g2;
    vec2 pC1 = pC3 - g2;
    vec2 pC5 = pC1 + g1;
    vec2 pC7 = pC3 + g1;


    // Reading the texels

    vec3 C0 = texture2D(OGL2Texture,pC0   ).xyz; 
    vec3 C1 = texture2D(OGL2Texture,pC1   ).xyz;
    vec3 C2 = texture2D(OGL2Texture,pC4-g2).xyz;
    vec3 C3 = texture2D(OGL2Texture,pC3   ).xyz;
    vec3 C4 = texture2D(OGL2Texture,pC4   ).xyz;
    vec3 C5 = texture2D(OGL2Texture,pC5   ).xyz;
    vec3 D4 = texture2D(OGL2Texture,pC8-g2).xyz;
    vec3 C6 = texture2D(OGL2Texture,pC4+g2).xyz;
    vec3 C7 = texture2D(OGL2Texture,pC7   ).xyz;
    vec3 C8 = texture2D(OGL2Texture,pC8   ).xyz;
    vec3 D5 = texture2D(OGL2Texture,pC5+g1).xyz;
    vec3 D0 = texture2D(OGL2Texture,pC7+g2).xyz;
    vec3 D1 = texture2D(OGL2Texture,pC8+g2).xyz;
    vec3 D2 = texture2D(OGL2Texture,pC7+g1).xyz;
    vec3 p10,p11;

    float c0 = reduce(C0);float c1 = reduce(C1);
    float c2 = reduce(C2);float c3 = reduce(C3);
    float c4 = reduce(C4);float c5 = reduce(C5);
    float c6 = reduce(C6);float c7 = reduce(C7);
    float c8 = reduce(C8);float d0 = reduce(D0);
    float d1 = reduce(D1);float d2 = reduce(D2);
    float d4 = reduce(D4);float d5 = reduce(D5);


    /*              SaI code               */
    /*  Copied from the Dosbox source code        */
    /*  Copyright (C) 2002-2007  The DOSBox Team  */
    /*  License: GNU-GPL                          */
    /*  Adapted by guest(r) on 20.4 and 9.5. 2007 */

    if (c4 == c8) {
        if (c5 != c7) {
            if (((c4 == c3)&&(c7 == d2))||((c4 == c5)&&(c4 == c6)&&(c3 != c7)&&(c7 == d0))) {
                    p10 = C4;
            } else {
                    p10 = 0.5*(C4+C7);
            }
            p11 = C4;
        } else {
            if (c4 == c5) {
                    p10 = C4;
                    p11 = C4;
            } else {
                float r;
                r  = GET_RESULT(c4,c5,c3,c1);
                r -= GET_RESULT(c5,c4,d4,c2);
                r -= GET_RESULT(c5,c4,c6,d1);
                r += GET_RESULT(c4,c5,d5,d2);
                if (r > 0.0) p11 = C4;
                else if (r < 0.0) p11 = C5;
                else p11 = 0.25*(C4+C5+C7+C8);
                p10 = 0.5*(C4+C7);
            }
        }
    } else
        if (c5 == c7) {
            if (((c7 == c6)&&(c4 == c2))||((c7 == c3)&&(c7 == c8)&&(c4 != c6)&&(c4 == c0))) {
                    p10 = C7;
            } else {
                    p10 = 0.5*(C4+C7);
            }
            p11 = C5;
        } else {
            p11 = 0.25*(C4+C5+C7+C8);

            if ((c4 == c5)&&(c4 == c6)&&(c3 != c7)&&(c7 == d0)) {
                    p10 = C4;
            } else if ((c7 == c3)&&(c7 == c8)&&(c4 != c6)&&(c4 == c0)) {
                       p10 = C7;
            } else {
                p10 = 0.5*(C4+C7);
        } 
    }

    // Distributing the final products    
    
    if (fp.x >= 0.5 && fp.y >= 0.5) gl_FragColor.xyz = p11; else
    if (fp.x <  0.5 && fp.y <  0.5) gl_FragColor.xyz =  C4; else
    gl_FragColor.xyz = p10;
}


Vertex file stays the same...

[Dieser Beitrag wurde am 10.05.2007 - 13:11 von guest aktualisiert]




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 09.05.2007 - 21:57Jump to top Quote this post Report this post Edit Delete


Wow, that was some optimisation, I get a constant 60fps now! *goes to see what you changed*

Edit: well it took me some time to figure it out with the somewhat messed up spacing, but I see what you did now; nice work!

[Dieser Beitrag wurde am 09.05.2007 - 22:20 von VerGreeneyes aktualisiert]




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 10.05.2007 - 13:16Jump to top Quote this post Report this post Edit Delete


It looks like the SW 2xSaI code has still some speedup potential (SuperEagle is optimised already).
I'll upload the shader again since the branching levels were really too well disguised.

I almost forgot...

When i first used the code to calculate p01 as the "major candidate", some colors looked different.
Dunno if it's an older, newer bug, scaler feature...

When i turned things around (and used the p10 code) i didn't notice any differences between full and adapted algorithms.




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 10.05.2007 - 14:23Jump to top Quote this post Report this post Edit Delete


Well, barring further optimisations, here's my final code (vertex file unchanged):

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
 
/*
  2xSaI GLSL shader

           - Copyright (C) 2007 guest(r) - guest.r@gmail.com

           - License: GNU-GPL

  The 2xSaI algorithm

           - Copyright (c) 1999-2001 by Derek Liauw Kie Fa.
*/

#define reduce(color)(dot(color,dt))

const vec3 dt = vec3(65536.,256.,1.);
uniform vec4 OGL2Size, OGL2InvSize;
uniform sampler2D OGL2Texture;

void main()
{

    // Calculating texel coordinates

    vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy,
         fp = fract(OGL2Pos),
         g1 = vec2( OGL2InvSize.x,OGL2InvSize.y),
         g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y);

    if(fp.x >= .5 && fp.y < .5) g2=-g2;

    vec2 pC4 = floor(OGL2Pos)/OGL2Size.xy,
         g3 = .5*(g1-g2), pC8 = pC4+g1;


    // Reading the texels

    vec3 C0 = texture2D(OGL2Texture,pC4-g1      ).xyz,
         C1 = texture2D(OGL2Texture,pC4-g2-g3   ).xyz,
         C2 = texture2D(OGL2Texture,pC4-g2      ).xyz,
         C3 = texture2D(OGL2Texture,pC4-g3      ).xyz,
         C4 = texture2D(OGL2Texture,pC4         ).xyz,
         C5 = texture2D(OGL2Texture,pC4+g3      ).xyz,
         C6 = texture2D(OGL2Texture,pC4+g2      ).xyz,
         C7 = texture2D(OGL2Texture,pC8-g3      ).xyz,
         C8 = texture2D(OGL2Texture,pC8         ).xyz,
         D0 = texture2D(OGL2Texture,pC4+2.*g2+g3).xyz,
         D1 = texture2D(OGL2Texture,pC8+g2      ).xyz,
         D2 = texture2D(OGL2Texture,pC8+g1-g3   ).xyz,
         D4 = texture2D(OGL2Texture,pC8-g2      ).xyz,
         D5 = texture2D(OGL2Texture,pC8+g3      ).xyz,
         p10,p11;

    float c0 = reduce(C0),c1 = reduce(C1),c2 = reduce(C2),c3 = reduce(C3),
          c4 = reduce(C4),c5 = reduce(C5),c6 = reduce(C6),c7 = reduce(C7),
          c8 = reduce(C8),d0 = reduce(D0),d1 = reduce(D1),d2 = reduce(D2),
          d4 = reduce(D4),d5 = reduce(D5);


    /*              SaI code               */
    /*  Copied from the Dosbox source code        */
    /*  Copyright (C) 2002-2007  The DOSBox Team  */
    /*  License: GNU-GPL                          */
    /*  Adapted by guest(r) on 20.4 and 9.5. 2007 */

    if(c4 == c8)
    {
        if(c5 != c7)
        {
            p10 = (c4 == c3 && c7 == d2 || c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : .5*(C4+C7);
            p11 = C4;
        }
        else
        {
            if(c4 == c5) p11 = (p10 = C4);
            else
            {
                float r = sign(abs(c4-c3)+abs(c4-c1))+sign(abs(c4-d4)+abs(c4-c2))
                         +sign(abs(c4-c6)+abs(c4-d1))+sign(abs(c4-d5)+abs(c4-d2))
                         -sign(abs(c5-c3)+abs(c5-c1))-sign(abs(c5-d4)+abs(c5-c2))
                         -sign(abs(c5-c6)+abs(c5-d1))-sign(abs(c5-d5)+abs(c5-d2));
                p10 = .5*(C4+C7);
                if(r > 0.) p11 = C4;
                else if(r < 0.) p11 = C5;
                else p11 = .25*(C4+C5+C7+C8);
            }
        }
    }
    else if(c5 == c7)
    {
        p10 = (c7 == c6 && c4 == c2 || c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
        p11 = C5;
    }
    else
    {
        p11 = 0.25*(C4+C5+C7+C8);
        if(c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) p10 = C4;
        else if(c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) p10 = C7;
        else p10 = 0.5*(C4+C7);
    }

    // Distributing the final products

    if(fp.x >= .5 && fp.y >= .5) gl_FragColor.xyz = p11;
    else if(fp.x >= .5 || fp.y >= .5) gl_FragColor.xyz = p10;
    else gl_FragColor.xyz = C4;
}


Hope you agree with the coding conventions I used.. this should be 'by the book'. This one gives me a constant ~61fps on my laptop, with vsync + triple buffering disabled (which makes me lose about 5fps). Tested in Chrono Trigger.

Edit: actually, I take that back.. this is all so confusing. Now I'm getting more fps without said combination... Oh well, it's at 60 atleast.

[Dieser Beitrag wurde am 10.05.2007 - 14:27 von VerGreeneyes aktualisiert]




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 10.05.2007 - 20:03Jump to top Quote this post Report this post Edit Delete


A version based on modified diag. color "equalities".
Branching through larger code segments more often consequences a moderate speed reduction.

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
 

/*
  2xSaI GLSL shader

           - Copyright (C) 2007 guest(r) - guest.r@gmail.com

           - License: GNU-GPL

           - Enhanced by VerGreeneyes (10.5.2007)

           - (Experimental version - modified color "equality"
           


  The 2xSaI algorithm

           - Copyright (c) 1999-2001 by Derek Liauw Kie Fa.
*/

#define reduce(color)(dot(color,dt))

const float th = 0.064; // 2 * 1/2^5 tolerance (2 singletons in RGB555)
 
const vec3  dt = vec3(65536.,256.,1.);
const vec3  dp = vec3(1.0);
uniform vec4 OGL2Size, OGL2InvSize;
uniform sampler2D OGL2Texture;

void main()
{

    // Calculating texel coordinates

    vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy,
         fp = fract(OGL2Pos),
         g1 = vec2( OGL2InvSize.x,OGL2InvSize.y),
         g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y);

    if(fp.x >= .5 && fp.y < .5) g2=-g2;

    vec2 pC4 = floor(OGL2Pos)/OGL2Size.xy,
         g3 = .5*(g1-g2), pC8 = pC4+g1;


    // Reading the texels

    vec3 C0 = texture2D(OGL2Texture,pC4-g1      ).xyz,
         C1 = texture2D(OGL2Texture,pC4-g2-g3   ).xyz,
         C2 = texture2D(OGL2Texture,pC4-g2      ).xyz,
         C3 = texture2D(OGL2Texture,pC4-g3      ).xyz,
         C4 = texture2D(OGL2Texture,pC4         ).xyz,
         C5 = texture2D(OGL2Texture,pC4+g3      ).xyz,
         C6 = texture2D(OGL2Texture,pC4+g2      ).xyz,
         C7 = texture2D(OGL2Texture,pC8-g3      ).xyz,
         C8 = texture2D(OGL2Texture,pC8         ).xyz,
         D0 = texture2D(OGL2Texture,pC4+2.*g2+g3).xyz,
         D1 = texture2D(OGL2Texture,pC8+g2      ).xyz,
         D2 = texture2D(OGL2Texture,pC8+g1-g3   ).xyz,
         D4 = texture2D(OGL2Texture,pC8-g2      ).xyz,
         D5 = texture2D(OGL2Texture,pC8+g3      ).xyz,
         p10,p11;

    float c0 = reduce(C0),c1 = reduce(C1),c2 = reduce(C2),c3 = reduce(C3),
          c4 = reduce(C4),c5 = reduce(C5),c6 = reduce(C6),c7 = reduce(C7),
          c8 = reduce(C8),d0 = reduce(D0),d1 = reduce(D1),d2 = reduce(D2),
          d4 = reduce(D4),d5 = reduce(D5);


    /*              SaI code               */
    /*  Copied from the Dosbox source code        */
    /*  Copyright (C) 2002-2007  The DOSBox Team  */
    /*  License: GNU-GPL                          */
    /*  Adapted by guest(r) on 20.4 and 9.5. 2007 */
    /*  and VerGreeneyes (10.5.2007)              */

  
    float dif1 = dot(abs(C4-C8),dp);    
    float dif2 = dot(abs(C5-C7),dp);    

    if(dif1 <= th)
    {
        if(dif2 > th)
        {
            p10 = (c4 == c3 && c7 == d2 || c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : .5*(C4+C7);
            p11 = C4;
        }
        else
        {
            if(c4 == c5) p11 = (p10 = C4);
            else
            {
                float r = sign(abs(c4-c3)+abs(c4-c1))+sign(abs(c4-d4)+abs(c4-c2))
                         +sign(abs(c4-c6)+abs(c4-d1))+sign(abs(c4-d5)+abs(c4-d2))
                         -sign(abs(c5-c3)+abs(c5-c1))-sign(abs(c5-d4)+abs(c5-c2))
                         -sign(abs(c5-c6)+abs(c5-d1))-sign(abs(c5-d5)+abs(c5-d2));
                p10 = .5*(C4+C7);
                if(r > 0.) p11 = C4;
                else if(r < 0.) p11 = C5;
                else p11 = .25*(C4+C5+C7+C8);
            }
        }
    }
    else if(dif2 <= th)
    {
        p10 = (c7 == c6 && c4 == c2 || c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
        p11 = C5;
    }
    else
    {
        p11 = 0.25*(C4+C5+C7+C8);
        if(c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) p10 = C4;
        else if(c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) p10 = C7;
        else p10 = 0.5*(C4+C7);
    }

    // Distributing the final products

    if(fp.x >= .5 && fp.y >= .5) gl_FragColor.xyz = p11;
    else if(fp.x >= .5 || fp.y >= .5) gl_FragColor.xyz = p10;
    else gl_FragColor.xyz = C4;
}




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 10.05.2007 - 20:35Jump to top Quote this post Report this post Edit Delete


I can't see any difference in Chrono Trigger - perhaps I should test a different game. Here's a version I was working on: I included your experimental changes along with some of my own. The GET_RESULT function is back, because I did some testing and I'm sure the previous version didn't behave the same way as the original (although I didn't see any difference, to be fair..); this slows the filter down a bit, but I also made it use vec4 instead of vec3, which speeds it up quite a bit. Found that out on accident. In theory the alpha channel is getting 2xSaId now, so you might want to check a few games to see if everything still looks right. (I don't include it in the calculations though)

Fragment file:

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
 

/*
  2xSaI GLSL shader

           - Copyright (C) 2007 guest(r) - guest.r@gmail.com

           - License: GNU-GPL

           - Enhanced by VerGreeneyes (10.5.2007)

           - (Experimental version - modified color "equality"


  The 2xSaI algorithm

           - Copyright (c) 1999-2001 by Derek Liauw Kie Fa.
*/

const float th = .0625; // 2 * 1/2^5 tolerance (2 singletons in RGB555)
const vec4 dt = vec4(65536.,256.,1.,0.);
const vec4 dp = vec4(1.,1.,1.,0.);
uniform vec4 OGL2Size, OGL2InvSize;
uniform sampler2D OGL2Texture;

#define reduce(color)(dot(color,dt))

float GET_RESULT(float A,float B,float C,float D)
{
    return float(A != C && A != D && B == C && B == D)-float(A == C && A == D);
}

void main()
{

    // Calculating texel coordinates

    vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy,
         fp = fract(OGL2Pos),
         g1 = vec2( OGL2InvSize.x,OGL2InvSize.y),
         g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y);

    if(fp.x >= .5 && fp.y < .5) g2=-g2;

    vec2 pC4 = floor(OGL2Pos)/OGL2Size.xy,
         g3 = .5*(g1-g2), pC8 = pC4+g1;


    // Reading the texels

    vec4 C0 = texture2D(OGL2Texture,pC4-g1      ),
         C1 = texture2D(OGL2Texture,pC4-g2-g3   ),
         C2 = texture2D(OGL2Texture,pC4-g2      ),
         C3 = texture2D(OGL2Texture,pC4-g3      ),
         C4 = texture2D(OGL2Texture,pC4         ),
         C5 = texture2D(OGL2Texture,pC4+g3      ),
         C6 = texture2D(OGL2Texture,pC4+g2      ),
         C7 = texture2D(OGL2Texture,pC8-g3      ),
         C8 = texture2D(OGL2Texture,pC8         ),
         D0 = texture2D(OGL2Texture,pC4+2.*g2+g3),
         D1 = texture2D(OGL2Texture,pC8+g2      ),
         D2 = texture2D(OGL2Texture,pC8+g1-g3   ),
         D4 = texture2D(OGL2Texture,pC8-g2      ),
         D5 = texture2D(OGL2Texture,pC8+g3      ),
         p10,p11;

    float c0 = reduce(C0),c1 = reduce(C1),c2 = reduce(C2),c3 = reduce(C3),
          c4 = reduce(C4),c5 = reduce(C5),c6 = reduce(C6),c7 = reduce(C7),
          c8 = reduce(C8),d0 = reduce(D0),d1 = reduce(D1),d2 = reduce(D2),
          d4 = reduce(D4),d5 = reduce(D5);

    /*              SaI code               */
    /*  Copied from the Dosbox source code        */
    /*  Copyright (C) 2002-2007  The DOSBox Team  */
    /*  License: GNU-GPL                          */
    /*  Adapted by guest(r) on 20.4 and 9.5. 2007 */
    /*  and VerGreeneyes (10.5.2007)              */


    float dif1 = dot(abs(C4-C8),dp), dif2 = dot(abs(C5-C7),dp);

    if(dif1 <= th)
    {
        if(dif2 > th)
        {
            p10 = (c4 == c3 && c7 == d2 || c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : .5*(C4+C7);
            p11 = C4;
        }
        else
        {
            if(c4 == c5) p11 = (p10 = C4);
            else
            {
                float r = GET_RESULT(c4,c5,c3,c1)+GET_RESULT(c4,c5,d5,d2)
                         -GET_RESULT(c5,c4,d4,c2)-GET_RESULT(c5,c4,c6,d1);
                p10 = .5*(C4+C7);
                if(r > 0.) p11 = C4;
                else if(r < 0.) p11 = C5;
                else p11 = .25*(C4+C5+C7+C8);
            }
        }
    }
    else if(dif2 <= th)
    {
        p10 = (c7 == c6 && c4 == c2 || c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
        p11 = C5;
    }
    else
    {
        p11 = 0.25*(C4+C5+C7+C8);
        if(c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) p10 = C4;
        else if(c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) p10 = C7;
        else p10 = 0.5*(C4+C7);
    }

    // Distributing the final products

    if(fp.x >= .5 && fp.y >= .5) gl_FragColor = p11;
    else if(fp.x >= .5 || fp.y >= .5) gl_FragColor = p10;
    else gl_FragColor = C4;
}


[Dieser Beitrag wurde am 10.05.2007 - 20:36 von VerGreeneyes aktualisiert]




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 10.05.2007 - 22:44Jump to top Quote this post Report this post Edit Delete


Hmm, I see you removed your post. Was the experimental build not working right? Also I was wondering, how big are the minute variations in colour we have to take into account? If it's just rounding errors, that can be solved by rounding the input values though it also kinda kills the speed.

Edit: just an update, here's a version I made that rounds the values. I found a few ways to speed things up a little since the last version so the speed hit isn't so big.. but I don't know if it changes anything, because I have no scenes to check it in! So let me know if this helps make the filter less glitchy, or if a more drastic approach is needed (like yours).

Fragment file:

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
 
/*
  2xSaI GLSL shader

           - Copyright (C) 2007 guest(r) - guest.r@gmail.com

           - License: GNU-GPL

           - Enhanced by Ver Greeneyes (10.5.2007)

  The 2xSaI algorithm

           - Copyright (c) 1999-2001 by Derek Liauw Kie Fa.
*/

const vec4 dt = vec4(65536.,256.,1.,.00390625);
uniform vec4 OGL2Size, OGL2InvSize;
uniform sampler2D OGL2Texture;

#define reduce(color)(dot(color,dt))

float GET_RESULT(float A,float B,float C,float D)
{
    return float(A != C && A != D && B == C && B == D)-float(A == C && A == D);
}

void main()
{

    // Calculating texel coordinates

    vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy,
         fp = fract(OGL2Pos),
         g1 = vec2( OGL2InvSize.x,OGL2InvSize.y),
         g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y);

    if(fp.x >= .5 && fp.y < .5) g2=-g2;

    vec2 pC4 = floor(OGL2Pos)/OGL2Size.xy,
         g3 = .5*(g1-g2), pC8 = pC4+g1, g4 = g2+g3;


    // Reading the texels

    vec4 C0 = floor(256.*texture2D(OGL2Texture,pC4-g1   )+.5),
         C1 = floor(256.*texture2D(OGL2Texture,pC4-g4   )+.5),
         C2 = floor(256.*texture2D(OGL2Texture,pC4-g2   )+.5),
         C3 = floor(256.*texture2D(OGL2Texture,pC4-g3   )+.5),
         C4 = floor(256.*texture2D(OGL2Texture,pC4      )+.5),
         C5 = floor(256.*texture2D(OGL2Texture,pC4+g3   )+.5),
         C6 = floor(256.*texture2D(OGL2Texture,pC4+g2   )+.5),
         C7 = floor(256.*texture2D(OGL2Texture,pC8-g3   )+.5),
         C8 = floor(256.*texture2D(OGL2Texture,pC8      )+.5),
         D0 = floor(256.*texture2D(OGL2Texture,pC4+g2+g4)+.5),
         D1 = floor(256.*texture2D(OGL2Texture,pC8+g2   )+.5),
         D2 = floor(256.*texture2D(OGL2Texture,pC8+g1-g3)+.5),
         D4 = floor(256.*texture2D(OGL2Texture,pC8-g2   )+.5),
         D5 = floor(256.*texture2D(OGL2Texture,pC8+g3   )+.5),
         p10,p11;

    float c0 = reduce(C0),c1 = reduce(C1),c2 = reduce(C2),c3 = reduce(C3),
          c4 = reduce(C4),c5 = reduce(C5),c6 = reduce(C6),c7 = reduce(C7),
          c8 = reduce(C8),d0 = reduce(D0),d1 = reduce(D1),d2 = reduce(D2),
          d4 = reduce(D4),d5 = reduce(D5);


    /*              SaI code               */
    /*  Copied from the Dosbox source code        */
    /*  Copyright (C) 2002-2007  The DOSBox Team  */
    /*  License: GNU-GPL                          */
    /*  Adapted by guest(r) on 20.4 and 9.5. 2007 */
    /*  and Ver Greeneyes (10.5.2007)             */

    if(c4 == c8)
    {
        if(c5 != c7)
        {
            p10 = (c4 == c3 && c7 == d2 || c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : .5*(C4+C7);
            p11 = C4;
        }
        else
        {
            if(c4 == c5) p11 = (p10 = C4);
            else
            {
                float r = GET_RESULT(c4,c5,c3,c1)+GET_RESULT(c4,c5,d5,d2)
                         -GET_RESULT(c5,c4,d4,c2)-GET_RESULT(c5,c4,c6,d1);
                p10 = .5*(C4+C7);
                if(r > 0.) p11 = C4;
                else if(r < 0.) p11 = C5;
                else p11 = .25*(C4+C5+C7+C8);
            }
        }
    }
    else if(c5 == c7)
    {
        p10 = (c7 == c6 && c4 == c2 || c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
        p11 = C5;
    }
    else
    {
        p11 = 0.25*(C4+C5+C7+C8);
        if(c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) p10 = C4;
        else if(c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) p10 = C7;
        else p10 = 0.5*(C4+C7);
    }

    // Distributing the final products

    if(fp.x >= .5 && fp.y >= .5) gl_FragColor = p11/256.;
    else if(fp.x >= .5 || fp.y >= .5) gl_FragColor = p10/256.;
    else gl_FragColor = C4/256.;
}


PS: If you want to test the speed, taking out the rounding changes should be simple enough. (just don't forget to take out the divisions at the bottom!)

[Dieser Beitrag wurde am 11.05.2007 - 01:15 von VerGreeneyes aktualisiert]




More : [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]

Similarly threads:
Topics Created by Replies Boardname
Cartoon GLSL shader guest 7 pete_bernert
Is there a OGL2 2xSaI Shader out there? GreenImp 0 pete_bernert
Scale2x Plus GLSL shader guest 0 pete_bernert
SuperEagle GLSL shader guest 0 pete_bernert
Super2xSaI GLSL shader guest 0 pete_bernert
Neuer Thread ...





Masthead

This forum is a free service of razyboard.com powered by:
Geizkragen Price Comparison. Top product in the price comparison: Krups Nespresso Essenza (XN2001)
Do you want a free forum in less than two minutes? Then click here!



Verwandte Suchbegriffe:
2xsai algorithm | glsl simple 2x scaler | glsl chrono | ogl2 vsync not working
blank