Received: from watserv1.uwaterloo.ca (watserv1.waterloo.edu) by karazm.math.UH.EDU with SMTP id AA13239 (5.65c/IDA-1.4.4 for ); Wed, 30 Oct 1991 23:15:59 -0600 Received: by watserv1.uwaterloo.ca id ; Thu, 31 Oct 91 00:11:25 -0500 Date: Thu, 31 Oct 91 00:11:25 -0500 From: Dave Stampe-Psy+Eng Message-Id: <9110310511.AA19267@watserv1.uwaterloo.ca> To: glove-list@karazm.math.uh.edu I've had quite a few requests to look at the fast VGA poly blitter code. I'ts not done by any means, but this is what I have so far. You'll notice that poly timing is done by subtracting a dummy call time from that of the poly drawing call: this gives a better estimate of the poly code speed without the C call, procedure and test parameter generation time. Obviously a general poly blitter with clipping will run a bit slower because of added interface code, but right now fine timing is critical, as this part of the blitter is called many times. Timing as of now (on my Paradise VGA card (pretty slow one) and a 486/25 is 6400 24x24 triangles or 4800 24x24 trapezoids per second. Thus, trapezoids are about 50% faster per pixel than the triangles. THe code is compiled with Borland C++ or Turbo C++ (others may need rewrites). Note the inline assembler: this will be moved to a seperate .asm file in the future, but this style seems to work well for development. Please contact me if you have any questions. More later. --------------------- fpoly.c -------------------------- #pragma inline #include #include #include #include #include union REGS regs; #define PUT 0 /* defines of write modes */ #define AND 1 #define OR 2 #define XOR 3 int gdriver = VGA; int gmode = VGAHI; #define VGA 0x3CE /* VGA controller port address */ int vmode = 0x0d; /* 320x200x16 colors */ unsigned char stmask[320]; /* start, end mask fast lookup arrays */ unsigned char fnmask[320]; unsigned char smask[] = { 0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01 }; unsigned char emask[] = { 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF }; make_data() /* fill mask arrays */ { /* wd. be code segment tables in assembler */ int i,j; for(i=0;i<320;i++) { stmask[i] = smask[i&7]; fnmask[i] = emask[i&7]; } } main() { long btime; float mtime; int i,j,k; initgraph(&gdriver,&gmode,""); regs.h.ah = 0; /* set video mode */ regs.h.al = vmode; /* as driver doesn't sppt 320x200 */ int86(0x10,®s,®s); make_data(); /* create dummy asm tables */ btime = biostime(0,0L); /* dummy timer to find interface time */ for(i=0;i<290;i++) for(k=0;k<170;k++) dpoly(i+20, i+20, i, i+24, k, 24+k, (i+k)%16); mtime = (float)(biostime(0,0L)-btime)/18.2; setup_hdwe(PUT); /* setup VGA hardware */ btime = biostime(0,0L); /* draw 49300 24x24 triangles */ for(i=0;i<290;i++) /* of 288 pixels ea. */ for(k=0;k<170;k++) trpoly(i+20, i+20, i, i+24, k, 24+k, (i+k)%16); reset_hdwe(); /* reset VGA hardware */ printf("Triangle blits: %f\n", (float)(biostime(0,0L)-btime)/18.2-mtime); setup_hdwe(PUT); btime = biostime(0,0L); /* draw 49300 24x24 trapezoids */ for(i=0;i<290;i++) /* of 576 pixels each */ for(k=0;k<170;k++) trpoly(i+7, i+30, i, i+25, k, 24+k, (i+k)%16); reset_hdwe(); printf("Trapezoidal blits: %f\n", (float)(biostime(0,0L)-btime)/18.2-mtime); getch(); textmode(-1); } setup_hdwe(int mode) /* set VGA to draw in desired mode */ { /* do ONCE for all polys */ asm { mov dx,VGA mov ah,BYTE PTR mode sal ah,1 sal ah,1 sal ah,1 mov al,03h /* set mode */ out dx,ax /* assumed PUT by BIOS */ mov ax,0B05h /* write mode 3, read mode 1 */ out dx,ax mov ax,0007h /* 0 to CDC for 0xFF read */ out dx,ax mov ax,0FF08h /* bit mask = all */ out dx,ax /* assumed 0xFF by BIOS */ mov ax,0FF01h /* ESR = 0x0F */ out dx,ax } } reset_hdwe() /* reset VGA to expected state after drawing */ { asm { mov dx,VGA mov ax,0000 out dx,ax mov ax,0001 out dx,ax mov ax,0003 out dx,ax mov ax,0005 out dx,ax } } /* 1 2 */ /* draw trapezoid: horizontal top, bottom */ /* */ /* do it as simply as possible: stack these to get */ /* 3 4 */ /* any 2 or 3-sided poly: quad is 50% faster per */ /* pixel for 24x24 than triangles */ /* just make 2 points the same for triangle draw. */ trpoly(int x1,int x2, int x3, int x4, int y1, int y3, int color) { unsigned int vline = y1*40; /* video line: offset in buffer */ long l_incr, r_incr; /* side slopes (16-bit underflow */ int lines = y3-y1; /* line counter */ if(lines<1)return; asm { .386 mov dx,VGA xor al,al mov ah,BYTE PTR color /* set color */ out dx,ax cld mov ax,0a000h /* set segment */ mov es,ax } asm { xor ecx,ecx /* compute left incrementer */ mov ax,x3 sub ax,x1 cwd movsx eax,ax movsx edx,dx /* (x3-x1)/(y3-y1) */ shl eax,16 mov cx,lines idiv ecx cmp eax,0 /* round up if + ( - already done) */ jle rnd1 inc eax } rnd1: asm { mov l_incr,eax mov ax,x4 /* compute right incrementer */ sub ax,x2 cwd movsx eax,ax /* (x4-x2)/(y4-y2) */ movsx edx,dx shl eax,16 mov cx,lines idiv ecx cmp eax,0 /* round up */ jle rnd2 inc eax } rnd2: asm { mov r_incr,eax mov dx,x1 /* set start of left/right */ mov si,x2 shl edx,16 /* add zero frac. part */ shl esi,16 add edx,08000h /* add 0.5 to left, so it rounds up */ mov bx,x1 /* faster to load reg's than to shift */ mov cx,x2 } nextline: /* bx=left side, cx=right side, vline=line start */ asm { mov al,[bx+stmask] /* compute left side */ shr bx,3 mov di,cx /* compute right side */ mov ah,[di+fnmask] /* lookup 350 nS faster than shift */ shr cx,3 mov di,vline add di,bx /* compute start byte */ sub cx,bx /* number of bytes - 1 */ jz onebyte jc doneline /* skip if L>R */ and es:[di],al /* mask start byte */ inc di dec cx /* cx==0 test not worth it: */ mov al,0ffh /* faster to let REP handle 0's */ rep stosb /* fill center bytes */ and es:[di],ah /* mask end byte */ } goto doneline; onebyte: asm { and al,ah /* only 1 byte to mask */ and es:[di],al /* combine start, end mask */ } doneline: asm { dec WORD PTR lines jz donetri mov ax,40 add vline,ax add edx,l_incr /* add in slope */ add esi,r_incr mov ebx,edx /* throw away fraction: lt rounded up */ sar ebx,16 mov ecx,esi sar ecx,16 cmp cx,0 /* clip to 0 on left: */ jge nextline /* code auto-clip rt to 0 */ xor cx,cx jmp nextline } donetri: ; } dpoly(int x1,int x2, int x3, int x4, int y1, int y3, int color) { unsigned int vline = y1*40; long l_incr, r_incr; int lines = y3-y1; if(lines<1)return; asm { .386 mov dx,VGA xor al,al mov ah,BYTE PTR color /* set color */ out dx,ax cld mov ax,0a000h /* set segment */ mov es,ax } } ---------------------- ends ----------------------- -------------------------------------------------------------------------- | My life is Hardware, | | | my destiny is Software, | Dave Stampe | | my CPU is Wetware... | | | Anybody got a SDB I can borrow? | dstamp@watserv1.uwaterloo.ca | __________________________________________________________________________