【问题标题】:How are PHP's built-in functions implemented internally?PHP 的内置函数是如何在内部实现的?
【发布时间】:2011-08-06 15:05:48
【问题描述】:

这些函数的编写方式是否与用户函数相同?我的意思是 PHP 代码和正则表达式之类的东西?

例如:

filter_var($email, FILTER_VALIDATE_EMAIL);

对比

http://www.totallyphp.co.uk/code/validate_an_email_address_using_regular_expressions.htm

【问题讨论】:

    标签: php function php-internals filter-var


    【解决方案1】:

    PHP 是用 C 语言编写的。PHP 函数是用高质量的 C 代码编写的,然后编译形成 PHP 语言库

    如果您想扩展 PHP(编辑/编写)自己的功能,请查看:http://www.php.net/~wez/extending-php.pdf

    编辑:

    给你:

    这是函数的原始 C 代码:

    /* {{{ proto mixed filter_var(mixed variable [, long filter [, mixed options]])
     * Returns the filtered version of the vriable.
     */
    PHP_FUNCTION(filter_var)
    {
        long filter = FILTER_DEFAULT;
        zval **filter_args = NULL, *data;
    
        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z/|lZ", &data, &filter, &filter_args) == FAILURE) {
            return;
        }
    
        if (!PHP_FILTER_ID_EXISTS(filter)) {
            RETURN_FALSE;
        }
    
        MAKE_COPY_ZVAL(&data, return_value);
    
        php_filter_call(&return_value, filter, filter_args, 1, FILTER_REQUIRE_SCALAR TSRMLS_CC);
    }
    /* }}} */
    
    
    
    static void php_filter_call(zval **filtered, long filter, zval **filter_args, const int copy, long filter_flags TSRMLS_DC) /* {{{ */
    {
        zval  *options = NULL;
        zval **option;
        char  *charset = NULL;
    
        if (filter_args && Z_TYPE_PP(filter_args) != IS_ARRAY) {
            long lval;
    
            PHP_FILTER_GET_LONG_OPT(filter_args, lval);
    
            if (filter != -1) { /* handler for array apply */
                /* filter_args is the filter_flags */
                filter_flags = lval;
    
                if (!(filter_flags & FILTER_REQUIRE_ARRAY ||  filter_flags & FILTER_FORCE_ARRAY)) {
                    filter_flags |= FILTER_REQUIRE_SCALAR;
                }
            } else {
                filter = lval;
            }
        } else if (filter_args) {
            if (zend_hash_find(HASH_OF(*filter_args), "filter", sizeof("filter"), (void **)&option) == SUCCESS) {
                PHP_FILTER_GET_LONG_OPT(option, filter);
            }
    
            if (zend_hash_find(HASH_OF(*filter_args), "flags", sizeof("flags"), (void **)&option) == SUCCESS) {
                PHP_FILTER_GET_LONG_OPT(option, filter_flags);
    
                if (!(filter_flags & FILTER_REQUIRE_ARRAY ||  filter_flags & FILTER_FORCE_ARRAY)) {
                    filter_flags |= FILTER_REQUIRE_SCALAR;
                }
            }
    
            if (zend_hash_find(HASH_OF(*filter_args), "options", sizeof("options"), (void **)&option) == SUCCESS) {
                if (filter != FILTER_CALLBACK) {
                    if (Z_TYPE_PP(option) == IS_ARRAY) {
                        options = *option;
                    }
                } else {
                    options = *option;
                    filter_flags = 0;
                }
            }
        }
    
        if (Z_TYPE_PP(filtered) == IS_ARRAY) {
            if (filter_flags & FILTER_REQUIRE_SCALAR) {
                if (copy) {
                    SEPARATE_ZVAL(filtered);
                }
                zval_dtor(*filtered);
                if (filter_flags & FILTER_NULL_ON_FAILURE) {
                    ZVAL_NULL(*filtered);
                } else {
                    ZVAL_FALSE(*filtered);
                }
                return;
            }
            php_zval_filter_recursive(filtered, filter, filter_flags, options, charset, copy TSRMLS_CC);
            return;
        }
        if (filter_flags & FILTER_REQUIRE_ARRAY) {
            if (copy) {
                SEPARATE_ZVAL(filtered);
            }
            zval_dtor(*filtered);
            if (filter_flags & FILTER_NULL_ON_FAILURE) {
                ZVAL_NULL(*filtered);
            } else {
                ZVAL_FALSE(*filtered);
            }
            return;
        }
    
        php_zval_filter(filtered, filter, filter_flags, options, charset, copy TSRMLS_CC);
        if (filter_flags & FILTER_FORCE_ARRAY) {
            zval *tmp;
    
            ALLOC_ZVAL(tmp);
            MAKE_COPY_ZVAL(filtered, tmp);
    
            zval_dtor(*filtered);
    
            array_init(*filtered);
            add_next_index_zval(*filtered, tmp);
        }
    }
    

    这是您的验证电子邮件程序: ——这回答了你的问题。 是的,它是由内部的正则表达式完成的。

    void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
    {
        /*
         * The regex below is based on a regex by Michael Rushton.
         * However, it is not identical.  I changed it to only consider routeable
         * addresses as valid.  Michael's regex considers a@b a valid address
         * which conflicts with section 2.3.5 of RFC 5321 which states that:
         *
         *   Only resolvable, fully-qualified domain names (FQDNs) are permitted
         *   when domain names are used in SMTP.  In other words, names that can
         *   be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
         *   in Section 5) are permitted, as are CNAME RRs whose targets can be
         *   resolved, in turn, to MX or address RRs.  Local nicknames or
         *   unqualified names MUST NOT be used.
         *
         * This regex does not handle comments and folding whitespace.  While
         * this is technically valid in an email address, these parts aren't
         * actually part of the address itself.
         *
         * Michael's regex carries this copyright:
         *
         * Copyright © Michael Rushton 2009-10
         * http://squiloople.com/
         * Feel free to use and redistribute this code. But please keep this copyright notice.
         *
         */
        const char regexp[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
    
        pcre       *re = NULL;
        pcre_extra *pcre_extra = NULL;
        int preg_options = 0;
        int         ovector[150]; /* Needs to be a multiple of 3 */
        int         matches;
    
    
        /* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
        if (Z_STRLEN_P(value) > 320) {
            RETURN_VALIDATION_FAILED
        }
    
        re = pcre_get_compiled_regex((char *)regexp, &pcre_extra, &preg_options TSRMLS_CC);
        if (!re) {
            RETURN_VALIDATION_FAILED
        }
        matches = pcre_exec(re, NULL, Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, ovector, 3);
    
        /* 0 means that the vector is too small to hold all the captured substring offsets */
        if (matches < 0) {
            RETURN_VALIDATION_FAILED
        }
    
    }
    /* }}} */
    

    【讨论】:

    • 那个正则表达式字符串很大。 php版本不是更快吗?
    【解决方案2】:

    PHP 函数是:

    • 用 C 语言而不是 PHP 编写
    • 或者只是其他库提供的函数的包装器(例如,PHP 的 curl 扩展只是 curl 库的包装器)


    如果你好奇,可以看看 PHP 的源代码——这里是它的 SVN:http://svn.php.net/viewvc/

    例如,filter_var() 函数应该在 filter extension 的源代码中的某处定义。

    【讨论】:

      【解决方案3】:

      不。 PHP 内部函数是用 C 编写的,而不是 PHP 代码。由于许多 Zend 运行时宏以及参数如何从 PHP 传输到 C 结构,这看起来相当笨拙。

      那个特定的函数确实使用了正则表达式。它也是一个很好的例子:
      http://svn.php.net/repository/php/php-src/branches/PHP_5_3/ext/filter/logical_filters.c
      在中间某处寻找regexp[]

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 1970-01-01
        • 2023-02-16
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2011-04-26
        相关资源
        最近更新 更多