清除從word粘貼過來多餘html代碼的ClearWord函數

我們在利用FckEditor編輯器的時候會有一個清除從Word粘貼過來的多餘html代碼的功能，它是利用JavaScript編寫的。有了這項功能以後，我們的網頁內容可以直接從Word拷貝粘貼而不用擔心內容裏會有一大堆多餘的東西佔據資料庫空間影響網頁執行的性能了。

那麼，我們參照了Fckeditor的JavaScript功能編寫了CFScript功能版本的ClearWord函數，利用該函數在頁面內容添加入庫時可以直接進行清除冗餘操作了。


<cfscript>
    function CleanWord(html)
    {
        html = REReplaceNocase(html,\'<o:p>\s*<\/o:p>\',\'\',\'all\');
        html = REReplaceNocase(html,\'<o:p>.*?<\/o:p>\',\' \',\'all\');
        html = REReplaceNocase(html,\'\s*mso-[^:]+:[^;"]+;?\',\'\',\'all\');
        html = REReplaceNocase(html,\'\s*MARGIN: 0cm 0cm 0pt\s*;\',\'\',\'all\');
        html = REReplaceNocase(html,\'\s*MARGIN: 0cm 0cm 0pt\s*"\',\'\"\',\'all\');
        html = REReplaceNocase(html,\'\s*TEXT-INDENT: 0cm\s*;\',\'\',\'all\');
        html = REReplaceNocase(html,\'\s*TEXT-INDENT: 0cm\s*"\',\'\"\',\'all\');
        html = REReplaceNocase(html,\'\s*TEXT-ALIGN: [^\s;]+;?"\',\'\"\',\'all\');
        html = REReplaceNocase(html,\'\s*PAGE-BREAK-BEFORE: [^\s;]+;?"\',\'\"\',\'all\');
        html = REReplaceNocase(html,\'\s*FONT-VARIANT: [^\s;]+;?"\',\'\"\',\'all\');
        html = REReplaceNocase(html,\'\s*tab-stops:[^;"]*;?\',\'\',\'all\');
        html = REReplaceNocase(html,\'\s*tab-stops:[^"]*\',\'\',\'all\');
        html = REReplaceNocase(html,\'\s*face="[^"]*"\',\'\',\'all\');
        html = REReplaceNocase(html,\'\s*face=[^ >]*\',\'\',\'all\');
        html = REReplaceNocase(html,\'\s*FONT-FAMILY:[^;"]*;?\',\'\',\'all\');
        html = REReplaceNocase(html,\'<(\w[^>]*) class=([^ |>]*)([^>]*)\',\'<\1\',\'all\');
        html = REReplaceNocase(html,\'<(\w[^>]*) style="([^\"]*)"([^>]*)\',\'<\1\',\'all\');
        html = REReplaceNocase(html,\'\s*style="\s*"\',\'\',\'all\');
        html = REReplaceNocase(html,\'<SPAN\s*[^>]*>\s* \s*<\/SPAN>\',\' \',\'all\');
        html = REReplaceNocase(html,\'<SPAN\s*[^>]*><\/SPAN>\',\'\',\'all\');
        html = REReplaceNocase(html,\'<(\w[^>]*) lang=([^ |>]*)([^>]*)\',\'<\1\',\'all\');
        html = REReplaceNocase(html,\'<SPAN\s*>(.*?)<\/SPAN>\',\'\1\',\'all\');
        html = REReplaceNocase(html,\'<FONT\s*>(.*?)<\/FONT>\',\'\1\',\'all\');
        html = REReplaceNocase(html,\'<\\?\?xml[^>]*>\',\'\',\'all\');
        html = REReplaceNocase(html,\'<\/?\w+:[^>]*>\',\'\',\'all\');
        html = REReplaceNocase(html,\'<H\d>\s*<\/H\d>\',\'\',\'all\');
        html = REReplaceNocase(html,\'<H1([^>]*)>\',\'<div\1><b><font size="6">\',\'all\');
        html = REReplaceNocase(html,\'<H2([^>]*)>\',\'<div\1><b><font size="5">\',\'all\');
        html = REReplaceNocase(html,\'<H3([^>]*)>\',\'<div\1><b><font size="4">\',\'all\');
        html = REReplaceNocase(html,\'<H4([^>]*)>\',\'<div\1><b><font size="3">\',\'all\');
        html = REReplaceNocase(html,\'<H5([^>]*)>\',\'<div\1><b><font size="2">\',\'all\');
        html = REReplaceNocase(html,\'<H6([^>]*)>\',\'<div\1><b><font size="1">\',\'all\');
        html = REReplaceNocase(html,\'<\/H\d>\',\'</font></b></div>\',\'all\');
        html = REReplaceNocase(html,\'<(U|I|STRIKE)> <\/\1>\',\' \',\'all\');
        html = REReplaceNocase(html,\'<([^\s>]+)[^>]*>\s*<\/\1>\',\'\',\'all\');
        html = REReplaceNocase(html,\'<([^\s>]+)[^>]*>\s*<\/\1>\',\'\',\'all\');
        html = REReplaceNocase(html,\'<([^\s>]+)[^>]*>\s*<\/\1>\',\'\',\'all\');
        html = REReplaceNocase(html,\'(<P)([^>]*>.*?)(<\/P>)\',\'<div\2</div>\',\'all\');
        return html;
    }
</cfscript>