【发布时间】:2020-01-25 02:35:26
【问题描述】:
表 temp 具有“word”和“sentence”列。下面的代码检查句子中是否有单词列中的任何单词。如果该词存在,则该词将被替换为 URL(包含该词本身及其 id)。该代码适用于大约 1-10 行。该表有大约 50k 条记录。它消耗了整个临时空间。如何查看和微调查询?
要求:有50k个单词和句子。如果单词列中存在任何单词,则要求将句子中的单词替换为 URL(包含单词及其 id)。在查找单词时,搜索必须不区分大小写。另外,在用 URL 替换时,我们需要在句子中保留相同的大小写。
Create table temp(
id NUMBER,
word VARCHAR2(1000),
Sentence VARCHAR2(2000)
);
insert into temp
SELECT 1,'automation testing', 'automtestingation TeStInG TEST is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
select 2,'test', 'test' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL UNION ALL
SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
SELECT 7,'Not Matched','testing testing testing' FROM DUAL
SQL 类型:
CREATE TYPE stringlist IS TABLE OF VARCHAR2(4000);
/
CREATE TYPE intlist IS TABLE OF NUMBER(20,0);
/
PLSQL 函数
CREATE FUNCTION replace_words(
word_list IN stringlist,
id_list IN intlist,
sentence IN temp.sentence%TYPE
) RETURN temp.sentence%TYPE
IS
p_sentence temp.sentence%TYPE := UPPER( sentence );
p_pos PLS_INTEGER := 1;
p_min_word_index PLS_INTEGER;
p_word_index PLS_INTEGER;
p_start PLS_INTEGER;
p_index PLS_INTEGER;
o_sentence temp.sentence%TYPE;
BEGIN
LOOP
p_min_word_index := NULL;
p_index := NULL;
FOR i IN 1 .. word_list.COUNT LOOP
p_word_index := p_pos;
LOOP
p_word_index := INSTR( p_sentence, word_list(i), p_word_index );
EXIT WHEN p_word_index = 0;
IF ( p_word_index > 1
AND REGEXP_LIKE( SUBSTR( p_sentence, p_word_index - 1, 1 ), '\w' )
)
OR REGEXP_LIKE( SUBSTR( p_sentence, p_word_index + LENGTH( word_list(i) ), 1 ), '\w' )
THEN
p_word_index := p_word_index + 1;
CONTINUE;
END IF;
IF p_min_word_index IS NULL OR p_word_index < p_min_word_index THEN
p_min_word_index := p_word_index;
p_index := i;
END IF;
EXIT;
END LOOP;
END LOOP;
IF p_index IS NULL THEN
o_sentence := o_sentence || SUBSTR( sentence, p_pos );
EXIT;
ELSE
o_sentence := o_sentence
|| SUBSTR( sentence, p_pos, p_min_word_index - p_pos )
|| 'http://localhost/'
|| id_list(p_index)
|| '/<u>'
|| SUBSTR( sentence, p_min_word_index, LENGTH( word_list( p_index ) ) )
|| '</u>';
p_pos := p_min_word_index + LENGTH( word_list( p_index ) );
END IF;
END LOOP;
RETURN o_sentence;
END;
/
合并
MERGE INTO temp dst
USING (
WITH lists ( word_list, id_list ) AS (
SELECT CAST(
COLLECT(
UPPER( word )
ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
)
AS stringlist
),
CAST(
COLLECT(
id
ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
)
AS intlist
)
FROM temp
)
SELECT t.ROWID rid,
replace_words(
word_list,
id_list,
sentence
) AS replaced_sentence
FROM temp t
CROSS JOIN lists
) src
ON ( dst.ROWID = src.RID )
WHEN MATCHED THEN
UPDATE SET sentence = src.replaced_sentence;
【问题讨论】:
-
请显示任何错误,并输出解释计划。谢谢
-
添加了解释计划。我得到 ORA-06502: PL/SQL: numeric or value error: string buffer too small and ORA-01652: unable to extend temp segment by 128 in tablespace TEMP.
-
我之前告诉过你,将一个表中的单词和另一个表中的句子分开。您当前的输入结构没有意义,并且使解决方案更加困难。
-
您需要“审查和微调”您的需求。您在其他问题中说搜索“单词”不区分大小写,但您希望 URL 保留句子中“单词”的大小写。您没有在此处重复该部分要求。您的要求本身就够难了:愚蠢的输入结构和不完整的要求无济于事。
-
嗨炖肉,如果不清楚,请道歉。有50k个单词和句子。你认为用不同案例的单词创建一个表格是一个可行的解决方案吗?例如电子商务(我们需要在新表中创建的表单是电子商务、电子商务、电子商务、电子商务)。如果单词列中存在任何单词,则要求将句子中的单词替换为 URL(包含单词及其 id)。在查找单词时,搜索必须不区分大小写。此外,我们需要在用 URL 替换时在句子中保留相同的大小写。
标签: sql oracle replace database-performance cross-join