extract https://github.com/mattn/mruby-onig-regexp @ a491074 () at deps/mruby-onig-regexp

This commit is contained in:
Kazuho Oku 2017-03-06 09:49:36 +09:00
parent 306cd87410
commit ebe4db2570
6 changed files with 87 additions and 35 deletions

View File

@ -3,7 +3,6 @@ addons:
apt:
packages:
- gperf
- libonig-dev
script:
- export MRUBY_CONFIG="$TRAVIS_BUILD_DIR/.travis_config.rb"
- git clone --depth 1 "https://github.com/mruby/mruby.git"

Binary file not shown.

Binary file not shown.

View File

@ -15,15 +15,19 @@ MRuby::Gem::Specification.new('mruby-onig-regexp') do |spec|
linker.libraries = ['pthread']
end
version = '5.15.0'
oniguruma_dir = "#{build_dir}/onig-#{version}"
oniguruma_lib = libfile "#{oniguruma_dir}/.libs/libonig"
version = '6.1.1'
oniguruma_dir = "#{build_dir}/onigmo-#{version}"
oniguruma_lib = libfile "#{oniguruma_dir}/.libs/libonigmo"
unless ENV['OS'] == 'Windows_NT'
oniguruma_lib = libfile "#{oniguruma_dir}/.libs/libonig"
oniguruma_lib = libfile "#{oniguruma_dir}/.libs/libonigmo"
else
oniguruma_lib = libfile "#{oniguruma_dir}/onig_s"
if ENV['PROCESSOR_ARCHITECTURE'] == 'AMD64'
oniguruma_lib = libfile "#{oniguruma_dir}/build-x86-64/onigmo"
else
oniguruma_lib = libfile "#{oniguruma_dir}/build-i686/onigmo"
end
end
header = "#{oniguruma_dir}/oniguruma.h"
header = "#{oniguruma_dir}/onigmo.h"
task :clean do
FileUtils.rm_rf [oniguruma_dir]
@ -63,7 +67,7 @@ MRuby::Gem::Specification.new('mruby-onig-regexp') do |spec|
_pp 'autotools', oniguruma_dir
run_command e, './autogen.sh' if File.exists? 'autogen.sh'
run_command e, "./configure --disable-shared --enable-static --enable-multithread #{host}"
run_command e, "./configure --disable-shared --enable-static #{host}"
run_command e, 'make'
else
run_command e, 'cmd /c "copy /Y win32 > NUL"'
@ -94,10 +98,14 @@ MRuby::Gem::Specification.new('mruby-onig-regexp') do |spec|
file "#{dir}/src/mruby_onig_regexp.c" => oniguruma_lib
cc.include_paths << oniguruma_dir
cc.defines += ['HAVE_ONIGMO_H']
end
if build.cc.respond_to? :search_header_path and build.cc.search_header_path 'oniguruma.h'
if spec.respond_to? :search_package and spec.search_package 'onigmo'
spec.cc.defines += ['HAVE_ONIGMO_H']
elsif spec.respond_to? :search_package and spec.search_package 'oniguruma'
spec.cc.defines += ['HAVE_ONIGURUMA_H']
elsif build.cc.respond_to? :search_header_path and build.cc.search_header_path 'oniguruma.h'
spec.linker.libraries << 'onig'
else
spec.bundle_onigmo

View File

@ -35,7 +35,13 @@ THE SOFTWARE.
#ifdef _MSC_VER
#define ONIG_EXTERN extern
#endif
#ifdef HAVE_ONIGMO_H
#include <onigmo.h>
#elif defined(HAVE_ONIGURUMA_H)
#include <oniguruma.h>
#else
#include "oniguruma.h"
#endif
#ifdef MRUBY_VERSION
#define mrb_args_int mrb_int
@ -43,6 +49,32 @@ THE SOFTWARE.
#define mrb_args_int int
#endif
static const char utf8len_codepage[256] =
{
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1,
};
static mrb_int
utf8len(const char* p, const char* e)
{
mrb_int len;
mrb_int i;
len = utf8len_codepage[(unsigned char)*p];
if (p + len > e) return 1;
for (i = 1; i < len; ++i)
if ((p[i] & 0xc0) != 0x80)
return 1;
return len;
}
static void
onig_regexp_free(mrb_state *mrb, void *p) {
onig_free((OnigRegex) p);
@ -68,7 +100,6 @@ onig_regexp_initialize(mrb_state *mrb, mrb_value self) {
mrb_get_args(mrb, "S|oo", &str, &flag, &code);
int cflag = 0;
OnigSyntaxType* syntax = ONIG_SYNTAX_RUBY;
OnigEncoding enc = ONIG_ENCODING_UTF8;
if(mrb_string_p(code)) {
char const* str_code = mrb_string_value_ptr(mrb, code);
@ -96,7 +127,7 @@ onig_regexp_initialize(mrb_state *mrb, mrb_value self) {
OnigErrorInfo einfo;
OnigRegex reg;
int result = onig_new(&reg, (OnigUChar*)RSTRING_PTR(str), (OnigUChar*) RSTRING_PTR(str) + RSTRING_LEN(str),
cflag, enc, syntax, &einfo);
cflag, enc, ONIG_SYNTAX_RUBY, &einfo);
if (result != ONIG_NORMAL) {
char err[ONIG_MAX_ERROR_MESSAGE_LEN] = "";
onig_error_code_to_str((OnigUChar*)err, result);
@ -325,52 +356,52 @@ onig_regexp_to_s(mrb_state *mrb, mrb_value self) {
again:
if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
int err = 1;
ptr += 2;
if ((len -= 2) > 0) {
int err = 1;
ptr += 2;
if ((len -= 2) > 0) {
do {
if(strchr(ptr, 'i')) { options |= ONIG_OPTION_IGNORECASE; }
if(strchr(ptr, 'x')) { options |= ONIG_OPTION_EXTEND; }
if(strchr(ptr, 'm')) { options |= ONIG_OPTION_MULTILINE; }
++ptr;
++ptr;
} while (--len > 0);
}
if (len > 1 && *ptr == '-') {
}
if (len > 1 && *ptr == '-') {
++ptr;
--len;
do {
if(strchr(ptr, 'i')) { options &= ~ONIG_OPTION_IGNORECASE; }
if(strchr(ptr, 'x')) { options &= ~ONIG_OPTION_EXTEND; }
if(strchr(ptr, 'm')) { options &= ~ONIG_OPTION_MULTILINE; }
++ptr;
++ptr;
} while (--len > 0);
}
if (*ptr == ')') {
}
if (*ptr == ')') {
--len;
++ptr;
goto again;
}
if (*ptr == ':' && ptr[len-1] == ')') {
}
if (*ptr == ':' && ptr[len-1] == ')') {
OnigRegex rp;
++ptr;
len -= 2;
err = onig_new(&rp, (OnigUChar*)ptr, (OnigUChar*)ptr + len, ONIG_OPTION_DEFAULT,
ONIG_ENCODING_UTF8, OnigDefaultSyntax, NULL);
onig_free(rp);
}
if (err) {
}
if (err) {
options = onig_get_options(reg);
ptr = RSTRING_PTR(src);
len = RSTRING_LEN(src);
}
}
}
if (*option_to_str(optbuf, options)) mrb_str_cat_cstr(mrb, str, optbuf);
if ((options & embeddable) != embeddable) {
optbuf[0] = '-';
option_to_str(optbuf + 1, ~options);
mrb_str_cat_cstr(mrb, str, optbuf);
optbuf[0] = '-';
option_to_str(optbuf + 1, ~options);
mrb_str_cat_cstr(mrb, str, optbuf);
}
mrb_str_cat_cstr(mrb, str, ":");
@ -618,11 +649,9 @@ append_replace_str(mrb_state* mrb, mrb_value result, mrb_value replace,
default:
if (isdigit(*ch)) { // group number 0-9
int const idx = *ch - '0';
if (idx >= match->num_regs) {
mrb_raisef(mrb, E_INDEX_ERROR, "undefined group number reference: %S (max: %S)",
mrb_fixnum_value(idx), mrb_fixnum_value(match->num_regs));
if (idx < match->num_regs) {
mrb_str_cat(mrb, result, RSTRING_PTR(src) + match->beg[idx], match->end[idx] - match->beg[idx]);
}
mrb_str_cat(mrb, result, RSTRING_PTR(src) + match->beg[idx], match->end[idx] - match->beg[idx]);
} else {
char const str[] = { '\\', *ch };
mrb_str_cat(mrb, result, str, 2);
@ -649,7 +678,7 @@ string_gsub(mrb_state* mrb, mrb_value self) {
}
if(!mrb_nil_p(blk) && !mrb_nil_p(replace_expr)) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "both block and replace expression must not be passed");
blk = mrb_nil_value();
}
OnigRegex reg;
@ -674,6 +703,18 @@ string_gsub(mrb_state* mrb, mrb_value self) {
}
last_end_pos = match->end[0];
if (match->beg[0] == match->end[0]) {
/*
* Always consume at least one character of the input string
* in order to prevent infinite loops.
*/
char* p = RSTRING_PTR(self) + last_end_pos;
char* e = p + RSTRING_LEN(self);
int len = utf8len(p, e);
if (RSTRING_LEN(self) < last_end_pos + len) break;
mrb_str_cat(mrb, result, p, len);
last_end_pos += len;
}
}
mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, RSTRING_LEN(self) - last_end_pos);
@ -946,7 +987,9 @@ mrb_mruby_onig_regexp_gem_init(mrb_state* mrb) {
mrb_define_const(mrb, clazz, "CAPTURE_GROUP", mrb_fixnum_value(ONIG_OPTION_CAPTURE_GROUP));
mrb_define_const(mrb, clazz, "NOTBOL", mrb_fixnum_value(ONIG_OPTION_NOTBOL));
mrb_define_const(mrb, clazz, "NOTEOL", mrb_fixnum_value(ONIG_OPTION_NOTEOL));
#ifdef ONIG_OPTION_POSIX_REGION
mrb_define_const(mrb, clazz, "POSIX_REGION", mrb_fixnum_value(ONIG_OPTION_POSIX_REGION));
#endif
#ifdef ONIG_OPTION_ASCII_RANGE
mrb_define_const(mrb, clazz, "ASCII_RANGE", mrb_fixnum_value(ONIG_OPTION_ASCII_RANGE));
#endif

View File

@ -256,7 +256,9 @@ assert('String#onig_regexp_gsub') do
assert_equal 'h<e>ll<o> mr<u>by', test_str.onig_regexp_gsub(OnigRegexp.new('([aeiou])'), '<\1>')
assert_equal 'h e l l o m r u b y ', test_str.onig_regexp_gsub(OnigRegexp.new('\w')) { |v| v + ' ' }
assert_equal 'h{e}ll{o} mr{u}by', test_str.onig_regexp_gsub(OnigRegexp.new('(?<hoge>[aeiou])'), '{\k<hoge>}')
assert_raise(IndexError) { test_str.onig_regexp_gsub(OnigRegexp.new('(mruby)'), '<\2>') }
assert_equal '.h.e.l.l.o. .m.r.u.b.y.', test_str.onig_regexp_gsub(OnigRegexp.new(''), '.')
assert_equal " hello\n mruby", "hello\nmruby".onig_regexp_gsub(OnigRegexp.new('^'), ' ')
assert_equal "he<l><><l><>o mruby", test_str.onig_regexp_gsub(OnigRegexp.new('(l)'), '<\1><\2>')
end
assert('String#onig_regexp_scan') do