存在一个需求:将数据库某一字段中的年月日取出来,遍查函数,只有hive中的REGEXP_EXTRACT可以满足,但是其需要借助于map-reduce,可不可以写一个类似的接近这个函数的方法呢?下面写了一个函数,结果出来了,但是可能不太理想。或者读者直接用内存处理就行了,能不用就不用,这里只做探索。

图:执行结果

mysql如何实现hive中的REGEXP_EXTRACT()

函数代码:

CREATE FUNCTION REGEXP_EXTRACT(Varstring varchar(5000))
RETURNS varchar(3000)
BEGIN
DECLARE v_length INT DEFAULT 0;


DECLARE v_Tmp TEXT default '';

DECLARE r_varString TEXT;
DECLARE r_length INT DEFAULT 0;
DECLARE r_Tmp TEXT default '';
DECLARE v_mid TEXT default '';
DECLARE v_mid2 TEXT default '';
DECLARE v_mid_1 TEXT default '';

DECLARE s_varString TEXT;
DECLARE s_length INT DEFAULT 0;
DECLARE s_Tmp TEXT default '';
DECLARE s_mid TEXT default '';

set v_length=CHAR_LENGTH(Varstring);
WHILE v_length > 0 DO
SET v_mid = mid(Varstring,v_length,1);
SET v_mid2 = mid(Varstring,v_length,2);
SET v_mid_1 = mid(Varstring,v_length - 1,1);
IF (
    (
        ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '月')
         OR
        ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '日')
         OR
        ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '时')
         OR
        ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '分')
         OR
        ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '秒')
         OR
        ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '年')
    ) 
    OR (v_mid_1 = '月' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58)) 
    OR (v_mid_1 = '日' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))
    OR (v_mid_1 = '时' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))
    OR (v_mid_1 = '分' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))
    OR (v_mid_1 = '秒')
    OR (v_mid_1 = '年' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))
    
    OR v_mid = '月'
    OR v_mid = '日'
    OR v_mid = '时'
    OR v_mid = '分'
    OR v_mid = '秒'
    OR v_mid = '年'

    OR ((ASCII(v_mid)>47 and ASCII(v_mid)<58))

    OR v_mid = ','
    OR v_mid = '。'
    OR v_mid = ';'
    OR v_mid = '('
    OR v_mid = ')'
    OR v_mid = ':'
    OR v_mid = '“'
    OR v_mid = '”'
) THEN
set v_Tmp=concat(v_Tmp,v_mid);
ELSE SET v_Tmp = concat(v_Tmp,'|');
END IF;
SET v_length = v_length - 1;
END WHILE;

SET r_varString = REVERSE(v_Tmp);
set r_length=CHAR_LENGTH(r_varString);
WHILE r_length > 0 DO
IF (
    ((mid(r_varString,r_length,1) = '日') OR (mid(r_varString,r_length,1) = '分')) AND ((mid(r_varString,r_length,2) = '日|') OR (mid(r_varString,r_length,2) = '分|')) 
) THEN
SET r_Tmp = concat(r_Tmp,',');
SET r_Tmp = concat(r_Tmp,mid(r_varString,r_length,1));
ELSEIF (mid(r_varString,r_length,1) = '|')
THEN SET r_Tmp = concat(r_Tmp,'');
ELSE SET r_Tmp = concat(r_Tmp,mid(r_varString,r_length,1));
END IF;
SET r_length = r_length - 1;
END WHILE;

SET s_varString = REVERSE(r_Tmp);
set s_length=CHAR_LENGTH(s_varString);
WHILE s_length > 0 DO
SET s_mid = mid(s_varString,s_length,1);
IF (
    s_mid = ','
    OR s_mid = '。'
    OR s_mid = ';'
    OR s_mid = '('
    OR s_mid = ')'
    OR s_mid = ':'
    OR s_mid = '“'
    OR s_mid = '”'
    OR s_mid = ','
) THEN
SET s_Tmp = concat(s_Tmp,'|');
ELSE SET s_Tmp = concat(s_Tmp,s_mid);
END IF;
SET s_length = s_length - 1;
END WHILE;

RETURN REVERSE(s_Tmp);

END;

相关文章:

  • 2021-12-05
  • 2022-12-23
  • 2022-03-09
  • 2021-07-20
  • 2021-06-15
  • 2021-03-30
  • 2022-12-23
猜你喜欢
  • 2021-06-04
  • 2022-12-23
  • 2021-11-26
  • 2022-12-23
  • 2021-06-04
  • 2022-12-23
  • 2022-12-23
相关资源
相似解决方案