Introduction
Converting from RTF with the Translator is straightforward. The following example shows a function that takes an RTF document, and strips out all the formatting and non-text objects to produce plain text.
As you can see it is as simple as loading the file and making a single function call.

Note: The main() function uses print() to log messages. When an Iguana channel is run print() messages are logged as informational messages. For more sophisticated logging you can use the iguana.logInfo(), iguana.logWarning(), iguana.logDebug() functions.
Source Code [top]
Here is the source code for the rtf module. To use it:
- Create a new shared module called “rtf” and copy paste in the code below.
- Add the code require(‘rtf’) at the top of the main module.
- Test using sample.rtf, or your own rtf file.
Code for main:
local rtf = require('rtf')
function main()
io.input('sample.rtf')
Rtf=io.read("*all")
local text=rtf.toTxt(Rtf)
trace(text)
end
Code for rtf module:
local rtf={}
--
-- Implementation
--
-- A simple set implementation in Lua
local function Set (list)
local set = {}
for _, l in ipairs(list) do set[l] = true end
return set
end
-- Which control sequences to ignore
local IgnoreSet = Set { 'info', 'fonttbl', 'colortbl', 'stylesheet', '*' }
local Out = '';
local StateStack = {};
local State;
-- States of the state machine
local PLAINTEXT = 1
local CONTROL = 2
local ARGUMENT = 3
local BACKSLASH = 4
local ESCAPED_CHAR = 5
-- Character destinationions
local USE = 0
local IGNORE = 1
-- To print an exception only the first time
local UnexpectedCharFound = false;
-- Sets current character destination (IGNORE or USE)
local function setDest (D)
Dest = D
end
local function pushState ()
table.insert (StateStack, { ['Dest'] = Dest } )
end
local function popState ()
local EL = table.remove (StateStack)
setDest (EL['Dest'])
end
-- Collect or ignore the character based on the current destination
local function putChar (C, B)
if C == '\r' then
C = '\n'
end
if Dest ~= IGNORE then
Out = Out..C
end
end
local function isAlpha (C)
return string.match (C, "%a") ~= nil
end
local function isDigit (C)
return string.match (C, "%d") ~= nil
end
local function isSpace (C)
return string.match (C, "%s") ~= nil
end
-- Process an RTF control word
-- T is token
-- A is argument
local function doControl (T, A)
if T == 'par' then
putChar ('\n')
elseif IgnoreSet[T] then
setDest (IGNORE)
end
end
local function feedChar (C, B)
local function nextState (C, B, CheckSpace)
if C == '\\' then
State = BACKSLASH
elseif C == '{' then
pushState ()
elseif C == '}' then
popState ()
else
if not CheckSpace or not isSpace (C) then
putChar (C, B)
end
end
end
if State == PLAINTEXT then
nextState (C, B, false)
elseif State == BACKSLASH then
if C == '\\' or C == '{' or C == '}' then
putChar (C)
State = PLAINTEXT
else
if isAlpha (C) or C == '*' or C == '-' or C == '|' then
State = CONTROL
Token = C
elseif C == "'" then
State = ESCAPED_CHAR
EscapedChar = ''
elseif C == '\\' or C == '{' or C == '}' then
putChar (C)
State = PLAINTEXT
elseif C == '~' then
putChar (' ')
state = PLAINTEXT
else
if (UnexpectedCharFound ~= true) then
print ('Exception: unxepected '..C..' after \\')
UnexpectedCharFound = true
end
end
end
elseif State == ESCAPED_CHAR then
EscapedChar = EscapedChar..C
if #EscapedChar == 2 then
C = string.char (tonumber (EscapedChar, 16))
putChar (C)
State = PLAINTEXT
end
elseif State == CONTROL then
if isAlpha (C) then
Token = Token..C
elseif isDigit (C) or C == '-' then
State = ARGUMENT
Arg = C
else
doControl (Token, Arg)
State = PLAINTEXT
nextState (C, B, true)
end
elseif State == ARGUMENT then
if isDigit (C) then
Arg = Arg .. C
else
State = PLAINTEXT
doControl (Token, Arg)
nextState (C, B, true)
end
end
end
--
-- Public API
--
-- Given an RTF document as a Lua string (Data), return the text
-- portion of the document.
function rtf.toTxt(Data)
Out = ''
StateStack = { }
State = PLAINTEXT
setDest (USE)
Data = string.gsub (Data, '\r', '')
Data = string.gsub (Data, '\n', '')
local i = 1
for i = 1, #Data do
local B = string.byte (Data, i)
feedChar (string.char (B), B)
end
return Out
end
return rtf