regexp.h
00001 // -*- c-basic-offset: 2 -*- 00002 /* 00003 * This file is part of the KDE libraries 00004 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Lesser General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Lesser General Public 00017 * License along with this library; if not, write to the Free Software 00018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00019 * 00020 */ 00021 00022 #ifndef _KJS_REGEXP_H_ 00023 #define _KJS_REGEXP_H_ 00024 00025 #include <sys/types.h> 00026 00027 #include "config.h" 00028 00029 #ifdef HAVE_PCREPOSIX 00030 #include <pcre.h> 00031 #else // POSIX regex - not so good... 00032 extern "C" { // bug with some libc5 distributions 00033 #include <regex.h> 00034 } 00035 #endif //HAVE_PCREPOSIX 00036 00037 #include "ustring.h" 00038 00039 namespace KJS { 00040 00041 class RegExp { 00042 public: 00043 enum { None = 0, Global = 1, IgnoreCase = 2, Multiline = 4 }; 00044 RegExp(const UString &p, int f = None); 00045 ~RegExp(); 00046 int flags() const { return flgs; } 00047 UString pattern() const { return pat; } 00048 bool isValid() const { return valid; } 00049 UString match(const UString &s, int i, int *pos = 0, int **ovector = 0); 00050 // test is unused. The JS spec says that RegExp.test should use 00051 // RegExp.exec, so it has to store $1 etc. 00052 // bool test(const UString &s, int i = -1); 00053 unsigned int subPatterns() const { return nrSubPatterns; } 00054 00055 //These methods should be called around the match of the same string.. 00056 void prepareMatch(const UString &s); 00057 void doneMatch(); 00058 private: 00059 const UString pat; 00060 int flgs : 8; 00061 bool m_notEmpty; 00062 bool valid; 00063 00064 // Cached encoding info... 00065 char* buffer; 00066 int* originalPos; 00067 int bufferSize; 00068 00069 void prepareUtf8 (const UString& s); 00070 void prepareASCII (const UString& s); 00071 #ifndef NDEBUG 00072 UString originalS; // the original string, used for sanity-checking 00073 #endif 00074 00075 #ifndef HAVE_PCREPOSIX 00076 regex_t preg; 00077 #else 00078 pcre *pcregex; 00079 00080 enum UTF8SupportState { 00081 Unknown, 00082 Supported, 00083 Unsupported 00084 }; 00085 00086 #ifdef PCRE_CONFIG_UTF8 00087 static UTF8SupportState utf8Support; 00088 #endif 00089 #endif 00090 unsigned int nrSubPatterns; 00091 00092 RegExp(); 00093 }; 00094 00095 } // namespace 00096 00097 #endif