存在一个需求:将数据库某一字段中的年月日取出来,遍查函数,只有hive中的REGEXP_EXTRACT可以满足,但是其需要借助于map-reduce,可不可以写一个类似的接近这个函数的方法呢?下面写了一个函数,结果出来了,但是可能不太理想。或者读者直接用内存处理就行了,能不用就不用,这里只做探索。
图:执行结果
函数代码:
CREATE FUNCTION REGEXP_EXTRACT(Varstring varchar(5000))
RETURNS varchar(3000)
BEGIN
DECLARE v_length INT DEFAULT 0;
DECLARE v_Tmp TEXT default '';
DECLARE r_varString TEXT;
DECLARE r_length INT DEFAULT 0;
DECLARE r_Tmp TEXT default '';
DECLARE v_mid TEXT default '';
DECLARE v_mid2 TEXT default '';
DECLARE v_mid_1 TEXT default '';
DECLARE s_varString TEXT;
DECLARE s_length INT DEFAULT 0;
DECLARE s_Tmp TEXT default '';
DECLARE s_mid TEXT default '';
set v_length=CHAR_LENGTH(Varstring);
WHILE v_length > 0 DO
SET v_mid = mid(Varstring,v_length,1);
SET v_mid2 = mid(Varstring,v_length,2);
SET v_mid_1 = mid(Varstring,v_length - 1,1);
IF (
(
((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '月')
OR
((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '日')
OR
((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '时')
OR
((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '分')
OR
((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '秒')
OR
((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '年')
)
OR (v_mid_1 = '月' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))
OR (v_mid_1 = '日' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))
OR (v_mid_1 = '时' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))
OR (v_mid_1 = '分' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))
OR (v_mid_1 = '秒')
OR (v_mid_1 = '年' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))
OR v_mid = '月'
OR v_mid = '日'
OR v_mid = '时'
OR v_mid = '分'
OR v_mid = '秒'
OR v_mid = '年'
OR ((ASCII(v_mid)>47 and ASCII(v_mid)<58))
OR v_mid = ','
OR v_mid = '。'
OR v_mid = ';'
OR v_mid = '('
OR v_mid = ')'
OR v_mid = ':'
OR v_mid = '“'
OR v_mid = '”'
) THEN
set v_Tmp=concat(v_Tmp,v_mid);
ELSE SET v_Tmp = concat(v_Tmp,'|');
END IF;
SET v_length = v_length - 1;
END WHILE;
SET r_varString = REVERSE(v_Tmp);
set r_length=CHAR_LENGTH(r_varString);
WHILE r_length > 0 DO
IF (
((mid(r_varString,r_length,1) = '日') OR (mid(r_varString,r_length,1) = '分')) AND ((mid(r_varString,r_length,2) = '日|') OR (mid(r_varString,r_length,2) = '分|'))
) THEN
SET r_Tmp = concat(r_Tmp,',');
SET r_Tmp = concat(r_Tmp,mid(r_varString,r_length,1));
ELSEIF (mid(r_varString,r_length,1) = '|')
THEN SET r_Tmp = concat(r_Tmp,'');
ELSE SET r_Tmp = concat(r_Tmp,mid(r_varString,r_length,1));
END IF;
SET r_length = r_length - 1;
END WHILE;
SET s_varString = REVERSE(r_Tmp);
set s_length=CHAR_LENGTH(s_varString);
WHILE s_length > 0 DO
SET s_mid = mid(s_varString,s_length,1);
IF (
s_mid = ','
OR s_mid = '。'
OR s_mid = ';'
OR s_mid = '('
OR s_mid = ')'
OR s_mid = ':'
OR s_mid = '“'
OR s_mid = '”'
OR s_mid = ','
) THEN
SET s_Tmp = concat(s_Tmp,'|');
ELSE SET s_Tmp = concat(s_Tmp,s_mid);
END IF;
SET s_length = s_length - 1;
END WHILE;
RETURN REVERSE(s_Tmp);
END;