DOCUMENTACIÓN AQUYNZA DEL DESARROLLADOR

00001 //===========================================================================
00002 //= parsero.cc                                                Julio de 1998 =
00003 //=-------------------------------------------------------------------------=
00004 //= Analizador sintactico de proposito general en el alcance del proyecto   =
00005 //=-------------------------------------------------------------------------=
00006 //= ADVERTENCIA: ESTE SOFTWARE NO ESTA CONCEBIDO NI DISENNADO PARA EL USO   =
00007 //= EN EQUIPO DE CONTROL EN LINEA EN ENTORNOS PELIGROSOS QUE REQUIERAN UN   =
00008 //= DESEMPENNO LIBRE DE FALLAS, COMO LA OPERACION DE PLANTAS NUCLEARES,     = 
00009 //= SISTEMAS DE NAVEGACION O COMUNICACION EN AVIONES, TRAFICO AEREO,        =
00010 //= EQUIPO MEDICO DEL CUAL DEPENDAN VIDAS HUMANAS O SISTEMAS DE ARMAMENTO,  =
00011 //= EN LOS CUALES UNA FALLA EN EL SOFTWARE PUEDA IMPLICAR DIRECTAMENTE LA   =
00012 //= MUERTE, DANNOS PERSONALES O DANNOS FISICOS Y/O AMBIENTALES GRAVES       =
00013 //= ("ACTIVIDADES DE ALGO RIESGO").                                         =
00014 //=-------------------------------------------------------------------------=
00015 //= Autor original: Oscar J. Chavarro G.  A.K.A. JEDILINK. Copyright (c),   =
00016 //= 1997 - 2003, oscarchavarro@hotmail.com                                  =
00017 //= AQUYNZA es software libre, y se rige bajo los terminos de la licencia   =
00018 //= LGPL de GNU (http://www.gnu.org). Para mayor informacion respecto a la  =
00019 //= licencia de uso, consulte el archivo ./doc/LICENCIA en la distribucion. =
00020 //===========================================================================
00021 
00022 #include "jed_defs.h"  // Incluir antes que nada, en ese modulo se definen 
00023                        // aspectos importantes para la portabilidad del sistema
00024 
00025 #include <ctype.h>
00026 #include <stdio.h>
00027 #include <string.h>
00028 #include <stdlib.h>
00029 #include "toolkits/util/parsero.h"
00030 #include "lista.cc"
00031 
00032 #define MAX_BUFFER 3000
00033 char THE_buffer[MAX_BUFFER];
00034 
00035 //===========================================================================
00036 //= Funciones auxiliares                                                    =
00037 //===========================================================================
00038 
00039 void
00040 des_punte(char *cad)
00044 {
00045     int tam = strlen(cad), i;
00046 
00047     cad[tam-1] = '\0';
00048     for ( i = 0; cad[i]; i++ ) cad[i] = cad[i+1];
00049 }
00050 
00051 void
00052 des_comille(char *cad)
00057 {
00058     int tam = strlen(cad), i;
00059 
00060     if ( cad[tam-1] == '\"') cad[tam-1] = '\0';
00061 
00062     if ( cad[0] == '\"' ) {
00063         for ( i = 0; cad[i]; i++ ) cad[i] = cad[i+1];
00064     }
00065 }
00066 
00067 void
00068 simplifique_real(char *cad)
00074 {
00075     int i;
00076 
00077     if ( !strchr(cad, '.') ) return;
00078 
00079     for ( i = strlen(cad) - 1; i >= 0 && cad[i] == '0'; i-- );
00080     if ( cad[i] == '.' ) i--;
00081     cad[i + 1] = '\0';
00082 }
00083 
00084 void
00085 inserte_espacios(char *cad, int i)
00090 {
00091     int j;
00092 
00093     // Inserte un separador antes del '{'
00094     for ( j = strlen(cad); j >= i; j-- ) {
00095         cad[j + 1] = cad[j];
00096     }
00097     cad[i] = ' ';
00098     i++;
00099     // Inserte un separador despues del '{'
00100     for ( j = strlen(cad); j > i; j-- ) {
00101         cad[j + 1] = cad[j];
00102     }
00103     cad[i+1] = ' ';
00104 }
00105 
00106 //===========================================================================
00107 //= CLASE PARSERO                                                           =
00108 //===========================================================================
00109 
00110 PARSERO::PARSERO()
00111 {
00112     nivel_comentario = 0;
00113     nivel_bloque = 0;
00114     primer_pasada = TRUE;
00115     Remanente = NULL;
00116     index_fd = 0;
00117 }
00118 
00119 PARSERO::~PARSERO()
00120 {
00121     int i;
00122 
00123     for ( i = 0; i < fds.tam(); i++ ) {
00124         fclose(fds[i]);
00125     }
00126     fds.elim();
00127     if ( Remanente ) delete Remanente;
00128 }
00129 
00130 BOOLEAN
00131 PARSERO::leer_linea(void)
00132 {
00133     char *ptr, *qtr;
00134     FILE *fd;
00135 
00136     if ( !fgets(THE_buffer, MAX_BUFFER/2, fds[index_fd]) ) {
00137         fclose(fds[index_fd]);
00138         fds.elimElem(index_fd);
00139         index_fd--;
00140         if ( !fds.tam() ) return FALSE;
00141         else return leer_linea();
00142     }
00143     ptr = strstr(THE_buffer, "#include");
00144     if ( ptr ) {
00145         qtr = strtok(ptr, " \t\n");
00146         qtr = strtok(NULL, " \t\n");
00147         des_punte(qtr);
00148         fd = fopen(qtr, "rt");
00149         if ( !fd ) {
00150             fprintf(stderr, "<PARSERO> ERROR: No encuentro el archivo "
00151               "\"%s\"\n    referenciado en un #include!\n", qtr);
00152             fflush(stderr);
00153             //return FALSE;
00154             exit(1);
00155         }
00156         fds.anx(fd);
00157         index_fd++;
00158         return leer_linea();
00159     }
00160     return TRUE;
00161 }
00162 
00163 BOOLEAN
00164 PARSERO::init(char *nombre_archivo)
00167 {
00168     FILE *fd = fopen(nombre_archivo, "rt");
00169 
00170     if ( !fd ) {
00171         //fprintf(stderr, "<PARSERO> ERROR: No se puede abrir el archivo "
00172         //    "\"%s\"\n", nombre_archivo);
00173         //fflush(stderr);
00174         return FALSE;
00175     }
00176 
00177     fds.anx(fd);
00178 
00179     return TRUE;
00180 }
00181 
00182 void
00183 PARSERO::colapse_espacios(char *linea)
00184 {
00185     int i, j;
00186 
00187     // Evite espacios seguidos
00188     for ( i = 0, j = 0; linea[i] != '\0'; i++, j++ ) {
00189         if ( isspace(linea[i]) ) linea[i] = ' ';
00190         linea[j] = linea[i];
00191         if ( isspace(linea[i]) && isspace(linea[i+1]) ) j--;
00192     }
00193     linea[j] = '\0';
00194 
00195     // Quite el primero si es espacio
00196     if ( isspace(linea[0]) ) {
00197         for ( i = 0; linea[i+1] != '\0'; i++ ) linea[i] = linea[i+1];
00198         linea[i] = '\0';
00199     }
00200 }
00201 
00202 
00203 BOOLEAN
00204 PARSERO::preprocese_linea(char *linea)
00213 {
00214     BOOLEAN esta_vacia = TRUE;
00215     int i = 0;
00216 
00217     //- Procesamiento de comentarios tipo C --------------------------------
00218     for ( i = 0; nivel_comentario == 0 && linea[i] != '\0'; i++ ) {
00219         if ( linea[i] == '/' && linea[i+1] == '*' ) {
00220             nivel_comentario++;
00221             linea[i] = linea[i+1] = ' ';
00222         }
00223     }
00224 
00225     if ( nivel_comentario > 0 ) {
00226         while ( linea[i] != '\0' ) {
00227             if ( linea[i] == '/' && linea[i+1] == '*' ) {
00228                 nivel_comentario++;
00229                 linea[i] = linea[i+1] = ' ';
00230               }
00231               else if ( linea[i] == '*' && linea[i+1] == '/' ) {
00232                 nivel_comentario--;
00233                 linea[i] = linea[i+1] = ' ';
00234               }
00235               else {
00236                 if ( nivel_comentario > 0 ) linea[i] = ' ';
00237             }
00238             i++;
00239         }
00240     }
00241 
00242     //- Procesamiento de comentarios tipo C++ ------------------------------
00243     if ( nivel_comentario == 0 ) {
00244         // Los comentarios de linea solo se dan si no estan anidados.  De
00245         // otra manera el que se tiene en cuenta es el comentario que lo
00246         // contiene.
00247         for ( i = 0; linea[i] != '\0'; i++ ) {
00248             if ( linea[i] == '/' && linea[i+1] == '/' ) {
00249                 linea[i] = '\n'; i++;
00250                 linea[i] = '\0'; i++;
00251                 while ( linea[i] != '\0' ) {
00252                     linea[i] = ' ';
00253                     i++;
00254                 }
00255             }
00256         }
00257     }
00258 
00259     //- Mire si es una linea vacia------------------------------------------
00260     for ( i = 0; linea[i] != '\0'; i++ ) {
00261         if ( !isspace(linea[i]) ) {
00262             esta_vacia = FALSE;
00263             break;
00264         }
00265     }
00266     if ( esta_vacia ) return FALSE;
00267 
00268     //- Notese que varios separadores seguidos hacen un solo separador -----
00269     colapse_espacios(linea);
00270 
00271     if ( linea[strlen(linea)-1] == '\n' ) linea[strlen(linea)-1] = '\0';
00272 
00273     return TRUE;
00274 }
00275 
00276 char *
00277 PARSERO::procese_linea(char *linea)
00281 {
00282     int i;
00283     char *ptr = &linea[strlen(linea)];
00284 
00285     for ( i = 0; linea[i] != 0; i++ ) {
00286         if ( linea[i] == '=' ) {
00287             inserte_espacios(linea, i);
00288             i++;
00289         }
00290         if ( linea[i] == '{' ) {
00291             inserte_espacios(linea, i);
00292             nivel_bloque++;
00293             ptr = &(linea[i+2]);
00294             primer_pasada = FALSE;
00295             i++;
00296         }
00297         if ( linea[i] == '}' ) {
00298             inserte_espacios(linea, i);
00299             nivel_bloque--;
00300             ptr = &(linea[i+2]);
00301             primer_pasada = FALSE;
00302             i++;
00303         }
00304 #ifdef NONONO
00305         if ( linea[i] == '<' ) {
00306             // Quite los separadores que le siguen
00307             for ( j = k = i+1; linea[k] != '0'; j++, k++ ) {
00308                 while ( j == i+1 && isspace(linea[k]) ) k++; 
00309                 linea[j] = linea[k];
00310             }
00311         }
00312 #endif
00313     }
00314 
00315     return ptr;
00316 }
00317 
00318 int
00319 PARSERO::siguiente_segmento(LISTA<char*> **LIneas)
00350 {
00351     LISTA<char *> *Lineas;
00352     int i, j;
00353     char *cad, *ptr;
00354 
00355     primer_pasada = TRUE;
00356 
00357     if ( !fds.tam() ) return 0;
00358 
00359     //----------------------------------------------------------------------
00360     Lineas = new LISTA<char *>;
00361     if ( !Lineas ) {
00362         (*LIneas) = NULL;
00363         return -1;
00364     }
00365 
00366     for ( i = 0; leer_linea(); i++ ) {
00367         if ( Remanente ) {
00368             for ( j = strlen(THE_buffer); j >= 0; j-- ) {
00369                 THE_buffer[j+strlen(Remanente)] = THE_buffer[j];
00370             }
00371             for ( j = strlen(Remanente) - 1; j >= 0; j-- ) {
00372                 THE_buffer[j] = Remanente[j];
00373             }
00374             delete Remanente;
00375             Remanente = NULL;
00376         }
00377 
00378         if ( !preprocese_linea(THE_buffer) ) continue;
00379 
00380         ptr = procese_linea(THE_buffer);
00381         if ( !primer_pasada && nivel_bloque == 0 ) {
00382             // Acabo de terminar la vaina!
00383             if ( strlen(ptr) > 0 && !(strlen(ptr)==1 && ptr[0]==' ') ) {
00384                 Remanente = new char[strlen(ptr) + 1];
00385                 strcpy(Remanente, ptr);
00386             }
00387             ptr[0] = '\0';
00388         }
00389 
00390         //- Asigne espacio para la cadena
00391         cad = new char[strlen(THE_buffer) + 1];
00392         if ( !cad ) {
00393             for ( j = 0; j < Lineas->tam(); j++ ) {
00394                 delete (*Lineas)[j];
00395             }
00396             Lineas->elim();
00397             delete Lineas;
00398             (*LIneas) = NULL;
00399             return -1;
00400         }
00401         strcpy(cad, THE_buffer);
00402 
00403         //- Anexe la linea a la estructura preprocesada
00404         Lineas->anx(cad);
00405 
00406         //- Reporte de bloques cada que se detecta un "}" de primer nivel
00407         if ( !primer_pasada && nivel_bloque == 0 ) {
00408             (*LIneas) = Lineas;
00409             return 1;
00410         }
00411     }
00412 
00413     // OJO: AQUI HAY UN PROBLEMA:
00414     //      Si el archivo ya se acabo, pero quedo un remanente de antes,
00415     //      es importante reportar ese remanente!  (Para probar, procese
00416     //      un .h donde se defina una clase... vea que el ";" despues de
00417     //      el ultimo "}" se desaparece si el "};" resulta ser la ultima
00418     //      linea!
00419 
00420     //- Aqui se acaba el archivo -------------------------------------------
00421     (*LIneas) = Lineas;
00422     return 1;
00423 }
00424 
00425 //===========================================================================
00426 //= CLASE TOKENIZADOR                                                       =
00427 //===========================================================================
00428 
00429 TOKENIZADOR::TOKENIZADOR()
00430 {
00431     Lineas = NULL;
00432     strcpy(separadores, " \n\t,;:");
00433 }
00434 
00435 void
00436 TOKENIZADOR::cambiar_lista(LISTA<char *> *lst)
00437 {
00438     Lineas = lst;
00439 }
00440 
00441 BOOLEAN
00442 TOKENIZADOR::empiezo_con_cadena(char *cad)
00447 {
00448     int i, j;
00449     BOOLEAN bueno;
00450     char *ptr;
00451 
00452     for ( i = 0; cad[i]; i++ ) {
00453         if ( cad[i] == '\"' ) break;
00454         for ( bueno = FALSE, j = 0; separadores[j]; j++ ) {
00455             if ( cad[i] == separadores[j] ) bueno = TRUE;
00456         }
00457         if ( !bueno )return FALSE;
00458     }
00459 
00460     // Empiezo con cadena, me dispongo a quitar la basura de antes de la
00461     // cadena.
00462     ptr = strchr(cad, '\"');
00463     for ( i = 0; cad[i] && ptr; i++, ptr++ ) {
00464         cad[i] = (*ptr);
00465     }
00466     cad[i] = '\0';
00467     return TRUE;
00468 }
00469 
00470 int
00471 TOKENIZADOR::siguiente_token(char *cad)
00485 {
00486     char *ptr, *qtr, mi_cad[MAX_BUFFER];
00487     unsigned int i;
00488     BOOLEAN listo;
00489 
00490     if ( !Lineas || Lineas->tam() < 1 ) return TK_FIN;
00491 
00492     do {
00493         //- Aisle el siguiente token ---------------------------------------
00494         strcpy(mi_cad, (*Lineas)[0]);
00495         if ( empiezo_con_cadena(mi_cad) && strlen(mi_cad) > 0 ) {
00496             //- Separacion de un token de tipo cadena -
00497             for ( listo = FALSE, i = 1; mi_cad[i]; i++ ) {
00498                 if ( mi_cad[i] == '\"' ) {
00499                     mi_cad[i+1] = '\0';
00500                     listo = TRUE;
00501                     break;
00502                 }
00503             }
00504             if ( !listo ) {
00505                 // Quiere decir que no encontro el fin de cadena
00506                 fprintf(stderr, "<TOKENIZER> - ERROR DE SINTAXIS, cadena no "
00507                     "terminada en la siguiente linea:\n%s\n", (*Lineas)[0] );
00508                 fflush(stderr);
00509                 exit(1);
00510             }
00511             ptr = mi_cad;
00512           }
00513           else {
00514             //- Separacion de un token normal -
00515             ptr = strtok(mi_cad, separadores);
00516         }
00517 
00518         //- Avance ----------------------------------------------------------
00519         if ( !ptr || !strlen(ptr) ) {
00520             delete (*Lineas)[0];
00521             Lineas->elimElem(0);
00522           }
00523           else {
00524             //- Quitele el primer token a la cadena -
00525             qtr = strstr((*Lineas)[0], ptr);
00526             for ( i = 0; qtr && i < strlen(ptr); i++, qtr++ ) {
00527                 qtr[0] = ' ';
00528             }
00529             break;
00530         }
00531     }
00532     while( Lineas->tam() > 0 && (!ptr || !strlen(ptr)) );
00533 
00534     //- Determine el tipo de token adquirido ---------------------------------
00535     if ( ptr ) {
00536         strcpy(cad, ptr);
00537         //printf("[%s]\n", cad); fflush(stdout);
00538         if ( cad[0] == '\"' ) {
00539             return TK_CADENA;
00540           }
00541           else if ( cad[0] == '<' && isdigit(cad[1]) ||
00542                     cad[0] == '<' && isdigit(cad[2]) && cad[1] == '-' ) {
00543             return TK_VECTOR_INICIO;
00544           }
00545           else if ( cad[strlen(cad)-1]=='>' && isdigit(cad[strlen(cad)-2]) ) {
00546             return TK_VECTOR_FIN;
00547           }
00548           else if ( isdigit(cad[0]) ||
00549                     isdigit(cad[1]) && cad[0] == '-' ) {
00550             return TK_NUMERO;
00551           }
00552           else if ( cad[0] == '{' || cad[0] == '(' ) {
00553             return TK_ABRIR;
00554           }
00555           else if ( cad[0] == '}' || cad[0] == ')') {
00556             return TK_CERRAR;
00557           }
00558           else {
00559             return TK_IDENTIFICADOR;
00560         }
00561         ;
00562     }
00563 
00564     //- Borre la linea que acabo de procesar ---------------------------------
00565     cad[0] = '\0';
00566     if ( Lineas->tam() > 0 ) {
00567         delete (*Lineas)[0];
00568         Lineas->elimElem(0);
00569     }
00570     return TK_DESCONOCIDO;
00571 }
00572 
00573 
00574 //===========================================================================
00575 //= EOF                                                                     =
00576 //===========================================================================
00577
parsero.C