Pete´s Messageboard... No ISO/BIOS requests!

Homepage Members Register Login Search Old board


Neuer Thread ...
More : [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]


AuthorTopics » Book an abo for this threadClose Thread Move Thread Fix the thread Print view Delete this thread

VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 14.05.2007 - 16:59Jump to top Quote this post Report this post Edit Delete


Hmm, well I should be able to do some optimisation on that front; don't expect to get anything approaching full speed though, this is always going to be atleast four times slower than the 2xSaI filter by itself. (and there's some additional overhead) I fixed the lines issue though, and made it a bit smaller too.. maybe this version will work on your card (though I doubt there's much of a difference).

Fragment file:

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
 
uniform vec4 OGL2Size, OGL2InvSize;
uniform sampler2D OGL2Texture;
const float pi = 1.570796326794896619231321691640;
const vec4 vec1 = vec4(1.);
const vec4 dt = vec4(16777216.,65536.,256.,1.);

float reduce(vec4);
float GET_RESULT(float, float, float, float);
vec4 xSaI(vec2, vec2);

void main(){
    vec2 OGL2Pos = gl_TexCoord[0].xy*OGL2Size.xy,
         fp = fract(OGL2Pos),
         dx = vec2(OGL2InvSize.x,0.),
         dy = vec2(0.,OGL2InvSize.y),
         pC4 = floor(OGL2Pos)/OGL2Size.xy,
         s0 = vec2(float(fp.x< .5),float(fp.y< .5)),
         s1 = vec2(float(fp.x< .5),float(fp.y>=.5)),
         s2 = vec2(float(fp.x>=.5),float(fp.y< .5)),
         s3 = vec2(float(fp.x>=.5),float(fp.y>=.5));
    if(fp.x >= .5){fp.x = 1. - fp.x; dx = -dx;}
    if(fp.y >= .5){fp.y = 1. - fp.y; dy = -dy;}
    fp *= 2.;
    if(fp.x >= .5){fp.x = 1. - fp.x; dx = vec2(0.);}
    if(fp.y >= .5){fp.y = 1. - fp.y; dy = vec2(0.);}
    mat4 C = mat4(xSaI(s0,pC4-dx-dy),xSaI(s1,pC4-dx),
                  xSaI(s2,pC4-dy),xSaI(s3,pC4));
    mat2 gp = mat2((fp+.5)*(fp+.5),(fp-.5)*(fp-.5));
    vec4 c = vec4(gp[0][0]+gp[0][1],gp[0][0]+gp[1][1],
                  gp[1][0]+gp[0][1],gp[1][0]+gp[1][1];
    c = vec1 - sqrt(c);
    c *= vec4(greaterThan(c,vec4(0.)));
    c = -cos(pi*(c+vec1));
    gl_FragColor = (C[0]*c.x+C[1]*c.y+C[2]*c.z+C[3]*c.w)/(c.x+c.y+c.z+c.w);
}

float reduce(vec4 colour){
    return dot(colour,dt);
}

float GET_RESULT(float A, float B, float C, float D){
    return float(A != C && A != D && B == C && B == D) - float(A == C && A == D);
}

vec4 xSaI(vec2 fp, vec2 pC4){
    vec4 rValue;
    vec2 g1 = vec2( OGL2InvSize.x,OGL2InvSize.y),
         g2 = vec2(-OGL2InvSize.x,OGL2InvSize.y);
    if(fp.x >= .5 && fp.y < .5) g2=-g2;
    vec2 g3 = .5*(g1-g2), pC8 = pC4+g1, g4 = g2+g3;
    vec4 C0 = texture2D(OGL2Texture,pC4-g1   ),
         C1 = texture2D(OGL2Texture,pC4-g4   ),
         C2 = texture2D(OGL2Texture,pC4-g2   ),
         C3 = texture2D(OGL2Texture,pC4-g3   ),
         C4 = texture2D(OGL2Texture,pC4      ),
         C5 = texture2D(OGL2Texture,pC4+g3   ),
         C6 = texture2D(OGL2Texture,pC4+g2   ),
         C7 = texture2D(OGL2Texture,pC8-g3   ),
         C8 = texture2D(OGL2Texture,pC8      ),
         D0 = texture2D(OGL2Texture,pC4+g2+g4),
         D1 = texture2D(OGL2Texture,pC8+g2   ),
         D2 = texture2D(OGL2Texture,pC8+g1-g3),
         D4 = texture2D(OGL2Texture,pC8-g2   ),
         D5 = texture2D(OGL2Texture,pC8+g3   ),
         p10,p11;
    float c0 = reduce(C0),c1 = reduce(C1),c2 = reduce(C2),c3 = reduce(C3),
          c4 = reduce(C4),c5 = reduce(C5),c6 = reduce(C6),c7 = reduce(C7),
          c8 = reduce(C8),d0 = reduce(D0),d1 = reduce(D1),d2 = reduce(D2),
          d4 = reduce(D4),d5 = reduce(D5);
    if(c4 == c8){
        if(c5 != c7){
            p10 = (c4 == c3 && c7 == d2 || c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : .5*(C4+C7);
            p11 = C4;
        }else{
            if(c4 == c5) p11 = (p10 = C4);
            else{
                float r = GET_RESULT(c4,c5,c3,c1)+GET_RESULT(c4,c5,d5,d2)
                         -GET_RESULT(c5,c4,d4,c2)-GET_RESULT(c5,c4,c6,d1);
                p10 = .5*(C4+C7);
                if(r > 0.) p11 = C4;
                else if(r < 0.) p11 = C5;
                else p11 = .25*(C4+C5+C7+C8);
            }
        }
    }else if(c5 == c7){
        p10 = (c7 == c6 && c4 == c2 || c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
        p11 = C5;
    }else{
        p11 = 0.25*(C4+C5+C7+C8);
        if(c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) p10 = C4;
        else if(c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) p10 = C7;
        else p10 = 0.5*(C4+C7);
    }
    if(fp.x >= .5 && fp.y >= .5) rValue = p11;
    else if(fp.x >= .5 || fp.y >= .5) rValue = p10;
    else rValue = C4;
    return rValue;
}


Edit: current version does 14*4 = 56 texture lookups. I should be able to cut that down to 35 atleast.

[Dieser Beitrag wurde am 14.05.2007 - 17:37 von VerGreeneyes aktualisiert]




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 14.05.2007 - 18:16Jump to top Quote this post Report this post Edit Delete


This code could hopefully work with an reduced set of lookups and with 4x2 branches of the xSaI func. (with 4 calls realized and with colors as input data).

The code

Code:
1:
2:
3:
4:
 

    mat4 C = mat4(xSaI(s0,pC4-dx-dy),xSaI(s1,pC4-dx),
                  xSaI(s2,pC4-dy),xSaI(s3,pC4));

points to a max. 1 texel extending to the left and up.
I think it can be done with 25 lookups but i dunno what ammount of aditional spam code is actually generated with the Ati compiler.

[Dieser Beitrag wurde am 15.05.2007 - 17:28 von guest aktualisiert]




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 14.05.2007 - 21:04Jump to top Quote this post Report this post Edit Delete


Well, it turns out GLSL is mean when it comes to arrays.. it errors out on me and I don't know how to fix it. So there goes the option I wanted to try *sigh*

Edit: here's what I was trying:

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
 
uniform vec4 OGL2Size, OGL2InvSize;
uniform sampler2D OGL2Texture;
const float pi = 1.570796326794896619231321691640;
const vec2 pC4 = gl_TexCoord[0].xy;
vec2 dx = vec2(OGL2InvSize.x,0.),
     dy = vec2(0.,OGL2InvSize.y);
const vec4 vec1 = vec4(1.);
const vec4 dt = vec4(16777216.,65536.,256.,1.);
vec4 T[35] = {texture2D(OGL2Texture,pC4-2.*dx-2.*dy), texture2D(OGL2Texture,pC4-   dx-2.*dy),
              texture2D(OGL2Texture,pC4      -2.*dy), texture2D(OGL2Texture,pC4+   dx-2.*dy),
              texture2D(OGL2Texture,pC4+2.*dx-2.*dy), texture2D(OGL2Texture,pC4+3.*dx-2.*dy),
              texture2D(OGL2Texture,pC4-2.*dx-   dy), texture2D(OGL2Texture,pC4-   dx-   dy),
              texture2D(OGL2Texture,pC4      -   dy), texture2D(OGL2Texture,pC4+   dx-   dy),
              texture2D(OGL2Texture,pC4+2.*dx-   dy), texture2D(OGL2Texture,pC4+3.*dx-   dy),
              texture2D(OGL2Texture,pC4-2.*dx      ), texture2D(OGL2Texture,pC4-   dx      ),
              texture2D(OGL2Texture,pC4            ), texture2D(OGL2Texture,pC4+   dx      ),
              texture2D(OGL2Texture,pC4+2.*dx      ), texture2D(OGL2Texture,pC4+3.*dx      ),
              texture2D(OGL2Texture,pC4-2.*dx+   dy), texture2D(OGL2Texture,pC4-   dx+   dy),
              texture2D(OGL2Texture,pC4      +   dy), texture2D(OGL2Texture,pC4+   dx+   dy),
              texture2D(OGL2Texture,pC4+2.*dx+   dy), texture2D(OGL2Texture,pC4+3.*dx+   dy),
              texture2D(OGL2Texture,pC4-2.*dx+2.*dy), texture2D(OGL2Texture,pC4-   dx+2.*dy),
              texture2D(OGL2Texture,pC4      +2.*dy), texture2D(OGL2Texture,pC4+   dx+2.*dy),
              texture2D(OGL2Texture,pC4+2.*dx+2.*dy), texture2D(OGL2Texture,pC4+3.*dx+2.*dy),
              texture2D(OGL2Texture,pC4-2.*dx+3.*dy), texture2D(OGL2Texture,pC4-   dx+3.*dy),
              texture2D(OGL2Texture,pC4      +3.*dy), texture2D(OGL2Texture,pC4+   dx+3.*dy),
              texture2D(OGL2Texture,pC4+2.*dx+3.*dy)};

float reduce(vec4);
float GET_RESULT(float, float, float, float);
vec4 xSaI(vec2, int);

void main(){
    vec2 fp = fract(pC4*OGL2Size.xy),
         s0 = vec2(float(fp.x< .5),float(fp.y< .5)),
         s1 = vec2(float(fp.x< .5),float(fp.y>=.5)),
         s2 = vec2(float(fp.x>=.5),float(fp.y< .5)),
         s3 = vec2(float(fp.x>=.5),float(fp.y>=.5));
    if(fp.x >= .5){fp.x = 1. - fp.x; dx = -dx;}
    if(fp.y >= .5){fp.y = 1. - fp.y; dy = -dy;}
    fp *= 2.;
    if(fp.x >= .5){fp.x = 1. - fp.x; dx = vec2(0.);}
    if(fp.y >= .5){fp.y = 1. - fp.y; dy = vec2(0.);}
    mat4 C = mat4(xSaI(s0,int(sign(-dx.x)+6.*sign(-dy.y))),
                  xSaI(s1,int(sign(-dx.x)               )),
                  xSaI(s2,int(            6.*sign(-dy.y))),
                  xSaI(s3,0                              ));
    mat2 gp = mat2((fp+.5)*(fp+.5),(fp-.5)*(fp-.5));
    vec4 c = vec4(gp[0][0]+gp[0][1],gp[0][0]+gp[1][1],
                  gp[1][0]+gp[0][1],gp[1][0]+gp[1][1];
    c = vec1 - sqrt(c);
    c *= vec4(greaterThan(c,vec4(0.)));
    c = -cos(pi*(c+vec1));
    gl_FragColor = (C[0]*c.x+C[1]*c.y+C[2]*c.z+C[3]*c.w)/(c.x+c.y+c.z+c.w);
}

float reduce(vec4 colour){
    return dot(colour,dt);
}

float GET_RESULT(float A, float B, float C, float D){
    return float(A != C && A != D && B == C && B == D) - float(A == C && A == D);
}

vec4 xSaI(vec2 fp, int v){
    vec4 rValue;
    vec4 C0,C1,C2,C3,C4,C5,C6,C7,C8,D0,D1,D2,D4,D5;
    C0 = T[ 7+v];
    C4 = T[14+v];
    C8 = T[21+v];
    D5 = T[22+v];
    if(fp.x >= .5 && fp.y < .5){
         C1 = T[13+v];
         C2 = T[19+v];
         C3 = T[ 8+v];
         C5 = T[20+v];
         C6 = T[ 9+v];
         C7 = T[15+v];
         D0 = T[10+v];
         D1 = T[16+v];
         D2 = T[22+v];
         D4 = T[26+v];
    }else{
         C1 = T[ 8+v];
         C2 = T[ 9+v];
         C3 = T[13+v];
         C5 = T[15+v];
         C6 = T[19+v];
         C7 = T[20+v];
         D0 = T[22+v];
         D1 = T[26+v];
         D2 = T[27+v];
         D4 = T[16+v];
    }
    vec4 p10,p11;
    float c0 = reduce(C0),c1 = reduce(C1),c2 = reduce(C2),c3 = reduce(C3),
          c4 = reduce(C4),c5 = reduce(C5),c6 = reduce(C6),c7 = reduce(C7),
          c8 = reduce(C8),d0 = reduce(D0),d1 = reduce(D1),d2 = reduce(D2),
          d4 = reduce(D4),d5 = reduce(D5);
    if(c4 == c8){
        if(c5 != c7){
            p10 = (c4 == c3 && c7 == d2 || c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) ? C4 : .5*(C4+C7);
            p11 = C4;
        }else{
            if(c4 == c5) p11 = (p10 = C4);
            else{
                float r = GET_RESULT(c4,c5,c3,c1)+GET_RESULT(c4,c5,d5,d2)
                         -GET_RESULT(c5,c4,d4,c2)-GET_RESULT(c5,c4,c6,d1);
                p10 = .5*(C4+C7);
                if(r > 0.) p11 = C4;
                else if(r < 0.) p11 = C5;
                else p11 = .25*(C4+C5+C7+C8);
            }
        }
    }else if(c5 == c7){
        p10 = (c7 == c6 && c4 == c2 || c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) ? C7 : 0.5*(C4+C7);
        p11 = C5;
    }else{
        p11 = 0.25*(C4+C5+C7+C8);
        if(c4 == c5 && c4 == c6 && c3 != c7 && c7 == d0) p10 = C4;
        else if(c7 == c3 && c7 == c8 && c4 != c6 && c4 == c0) p10 = C7;
        else p10 = 0.5*(C4+C7);
    }
    if(fp.x >= .5 && fp.y >= .5) rValue = p11;
    else if(fp.x >= .5 || fp.y >= .5) rValue = p10;
    else rValue = C4;
    return rValue;
}


[Dieser Beitrag wurde am 14.05.2007 - 21:19 von VerGreeneyes aktualisiert]




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 15.05.2007 - 09:51Jump to top Quote this post Report this post Edit Delete


The GLSpec book v.1.10 states (page 21) that there is no mechanism for initializing arrays at declaration time from within a shader.

If the upper code works for you that means nVidia guys take "black, but slim possibility of gray" as "we'll make it white". Shame on them.

Indexing an array with an integer variable (still within boundaries) crashes the app.




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 15.05.2007 - 13:22Jump to top Quote this post Report this post Edit Delete


Yeah, I know the code doesn't work, although I was curious whether or not it would work with the ATI compiler. To be honest I'm not sure how to go about reducing the amount of texture lookups now (atleast without -completely- killing performance)

Edit: hang on, you can't even -initialise- it? Jeez, they really don't like arrays huh..

[Dieser Beitrag wurde am 15.05.2007 - 13:24 von VerGreeneyes aktualisiert]




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 15.05.2007 - 15:46Jump to top Quote this post Report this post Edit Delete


I guess there are two more options left...

1. Wait for the multipass feature (a nice adaptable implementation would be cool )
2. Try to think it out. (much work)


Or you can try something new.




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 15.05.2007 - 17:41Jump to top Quote this post Report this post Edit Delete


A multipass solution would be great, something like a list of filters with the amount of upscaling they're allowed to do.. *drool* I'm also hoping the SNES emulator bsnes will implement OGL2 soonish; the author (byuu) has said he really likes the idea but doesn't really know where to start.




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 15.05.2007 - 20:15Jump to top Quote this post Report this post Edit Delete


As i recall VBA has OGL support.
Peeking into it's source could reveal a mystery or two.
Nevertheless at least a 2 pass support would be fine - with a simple linear last shader to fit the image.




VerGreeneyes 
Strong supporter
......

...

Status:Offline
Date registered: 26.04.2007
Post:89
Send Message
...   Created on 15.05.2007 - 20:42Jump to top Quote this post Report this post Edit Delete


Yeah, it's really just single pass that's a pain. By the way, I've been thinking a bit about the problem of reducing the amount of texture lookups used, and one possibility I see must distinguish between nine distinct scenarios:

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
 
dx.x < 0.0 && dy.y < 0.0,
dx.x < 0.0 && dy.y == 0.0,
dx.x < 0.0 && dy.y > 0.0,
dx.x == 0.0 && dy.y < 0.0,
dx.x == 0.0 && dy.y == 0.0,
dx.x == 0.0 && dy.y > 0.0,
dx.x > 0.0 && dy.y < 0.0,
dx.x > 0.0 && dy.y == 0.0 and
dx.x > 0.0 && dy.y > 0.0.
Implementing this would obviously result in a big speed loss, but maybe it's worth it.. doing so might point the way to better solutions, as well.

[Dieser Beitrag wurde am 15.05.2007 - 21:00 von VerGreeneyes aktualisiert]




guest ...
Real addict
.........

...

Status:Offline
Date registered: 30.07.2004
Post:856
Send Message
...   Created on 15.05.2007 - 21:15Jump to top Quote this post Report this post Edit Delete


I would try the color-reduced Scale2x variant - requires 4x5 = 20 total lookups.

It would be a nice scale and sharp enough for the more blurry "average weigth filter".

Link to the shader.

PS:

The color reduction can be OTOH made much nicer in a manner that doesen't spoil the pallete too much:

Code:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
 

uniform sampler2D OGL2Texture;

void main()
{
    vec3 c11 = texture2D(OGL2Texture, gl_TexCoord[0].xy).xyz; 

    c11 = floor(8.0*sqrt(c11))*0.125; c11*=c11;    

    gl_FragColor.xyz=c11;
}


[Dieser Beitrag wurde am 15.05.2007 - 22:50 von guest aktualisiert]




More : [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]

Similarly threads:
Topics Created by Replies Boardname
Cartoon GLSL shader guest 7 pete_bernert
Is there a OGL2 2xSaI Shader out there? GreenImp 0 pete_bernert
Scale2x Plus GLSL shader guest 0 pete_bernert
SuperEagle GLSL shader guest 0 pete_bernert
Super2xSaI GLSL shader guest 0 pete_bernert
Neuer Thread ...





Masthead

This forum is a free service of razyboard.com powered by:
Geizkragen Price Comparison. Top product in the price comparison: Krups Nespresso Essenza (XN2001)
Do you want a free forum in less than two minutes? Then click here!



Verwandte Suchbegriffe:
2xsai comparison | mat4 glsl | implementing scale2x plus glsl
blank