• Iguana 6
  • Previous Versions
  • API
  • Sample Code
  • Training
  • Create a Ticket
iNTERFACEWARE Help Center
  • Iguana 6
  • Previous Versions
  • API
  • Sample Code
  • Training
  • Create a Ticket

Code Repository

Home›Code Repository›rtf.lua
Modules

rtf.lua

Verified Featured
Added by iNTERFACEWARE

A module for converting a RTF file to plain text.

Source Code
local rtf={}
 
-- RTF module - used to convert RTF document into plain text.
-- http://help.interfaceware.com/code/details/rtf-lua
 
-- A simple set implementation in Lua
local function Set (list)
  local set = {}
  for _, l in ipairs(list) do set[l] = true end
  return set
end
 
-- Which control sequences to ignore
local IgnoreSet = Set { 'info', 'fonttbl', 'colortbl', 'stylesheet', '*' }
local Out = '';
local StateStack = {};
local State;
 
-- States of the state machine
local PLAINTEXT = 1
local CONTROL = 2
local ARGUMENT = 3
local BACKSLASH = 4
local ESCAPED_CHAR = 5
 
-- Character destinationions
local USE = 0
local IGNORE = 1
 
-- To print an exception only the first time
local UnexpectedCharFound = false;
 
-- Sets current character destination (IGNORE or USE)
local function setDest (D)
   Dest = D    
end
 
local function pushState ()
   table.insert (StateStack, { ['Dest'] = Dest } )
end
 
local function popState ()
   local EL = table.remove (StateStack)
   setDest  (EL['Dest'])
end
 
-- Collect or ignore the character based on the current destination
local function putChar (C, B)
   if C == '\r' then
      C = '\n'
   end
   if Dest ~= IGNORE then
      Out = Out..C
   end
end
 
local function isAlpha (C)
   return string.match (C, "%a") ~= nil
end
 
local function isDigit (C)
   return string.match (C, "%d") ~= nil
end
 
local function isSpace (C)
   return string.match (C, "%s") ~= nil
end
 
-- Process an RTF control word
-- T is token
-- A is argument
local function doControl (T, A)
   if T == 'par' then
      putChar ('\n')
   elseif IgnoreSet[T] then
      setDest (IGNORE)
   end
end
 
local function feedChar (C, B)
   local function nextState (C, B, CheckSpace)
      if C == '\\' then
         State = BACKSLASH
      elseif C == '{' then
         pushState ()
      elseif C == '}' then
         popState ()
      else
         if not CheckSpace or not isSpace (C) then
            putChar (C, B)
         end
      end       
   end
 
   if State == PLAINTEXT then
      nextState (C, B, false)
   elseif State == BACKSLASH then
      if C == '\\' or C == '{' or C == '}' then
         putChar (C)
         State = PLAINTEXT
      else
         if isAlpha (C) or C == '*' or C == '-' or C == '|' then
            State = CONTROL
            Token = C
         elseif C == "'" then
            State = ESCAPED_CHAR
            EscapedChar = ''
         elseif C == '\\' or C == '{' or C == '}' then
            putChar (C)
            State = PLAINTEXT
         elseif C == '~' then
            putChar (' ')
            state = PLAINTEXT
         else
            if (UnexpectedCharFound ~= true) then
               print ('Exception: unxepected '..C..' after \\')
               UnexpectedCharFound = true
            end
         end           
      end
   elseif State == ESCAPED_CHAR then
      EscapedChar = EscapedChar..C
      if #EscapedChar == 2 then
         C = string.char (tonumber (EscapedChar, 16))
         putChar (C)
         State = PLAINTEXT
      end
   elseif State == CONTROL then
      if isAlpha (C) then
         Token = Token..C
      elseif isDigit (C) or C == '-' then
         State = ARGUMENT
         Arg = C
      else
         doControl (Token, Arg)
         State = PLAINTEXT
         nextState (C, B, true)
      end
   elseif State == ARGUMENT then
      if isDigit (C) then
         Arg = Arg .. C
      else
         State = PLAINTEXT
         doControl (Token, Arg)
         nextState (C, B, true)
      end
   end
end
 
-- 
-- Public API
-- 
 
-- Given an RTF document as a Lua string (Data), return the text
-- portion of the document.
function rtf.toTxt(Data)
   Out = ''
   StateStack = { }
   State = PLAINTEXT
   setDest (USE)
 
   Data = string.gsub (Data, '\r', '')
   Data = string.gsub (Data, '\n', '')
 
   local i = 1
   for i = 1, #Data do
      local B = string.byte (Data, i)
      feedChar (string.char (B), B)
   end
   return Out
end

local rtf_toTxt = {
   Title="rtf.toTxt";
   Usage="rtf.toTxt(rtf)",
   SummaryLine="Converts an RTF to plain text.",
   Desc=[[Converts an RTF to plain text removing all formatting in the process.
   <p>The rtf.toTxt() function takes an RTF document, and strips out all the 
   formatting and non-text objects to produce plain text.
   ]];
   ["Returns"] = {
      {Desc="RTF as plain with all formatting removed  <u>string</u>."},
   };
   ParameterTable= true,
   Parameters= {
      {rtf= {Desc='A string containing RTF data <u>string</u>.'}},
   };
   Examples={
      [[   -- Read an rtf file into the Content string variable
   local FileName = iguana.project.root() ..'/'..iguana.project.guid()..'/sample.rtf'
   local F = io.open(FileName,'r')
   local Content = F:read('*a')
   F:close()
   
   -- Now convert the content into plain text.
   local Text = rtf.toTxt(Content)
   trace(Text)
   -- Of course formatting is lost...
   ]],
   };
   SeeAlso={
      {
         Title="rtf.lua - in our code repository",
         Link="http://help.interfaceware.com/code/details/rtf-lua"
      },
      {
         Title="RTF conversion example",
         Link="http://help.interfaceware.com/v6/rtf-conversion-example"
      }
   }
}

help.set{input_function=rtf.toTxt, help_data=rtf_toTxt}

 
return rtf
Description
A module for converting a RTF file to plain text.
Attachments
rtf.zip
Usage Details

The code contains an rtf.toTxt() function that takes an RTF document, and strips out all the formatting and non-text objects to produce plain text.

How to use rtf.lua:

  • Create a new shared module called “rtf” and paste in the code above
  • Add local rtf = require ‘rtf’ at the top of the main module
  • Test using the attached sample.rtf, or your own rtf file

Example code for main:

local rtf = require 'rtf'
-- Simple module to convert RTF file into text.
function main()
   io.input('sample.rtf')
   Rtf=io.read("*all")

   -- Now convert the content into plain text.
   local text=rtf.toTxt(Rtf)
   trace(text)
   -- Of course formatting is lost...
end
More Information
Converting RTF to plain text
Bookmark
  • Reviews
  • Related Listings
Filter
Sort by: Newest First
  • Oldest First
  • Rating
  • Helpfulness
Write a Review
Rating
Keyword
Filter
Sort by: Newest First
  • Oldest First
  • Title
  • Most Reviews
  • Highest Rated
Rating
iNTERFACEWARE
xml.lua
Added by iNTERFACEWARE
Modules
A collection of helpful XML node functions.
dup.lua
Added by iNTERFACEWARE
Modules
Duplicate message filter.
stringutil.lua
Added by iNTERFACEWARE
Modules
A library of helpful extensions to the standard Lua string library.
store.lua
Added by iNTERFACEWARE
Modules
The "original" store module: Allows you to store key/value pairs in a persistent storage mechanism. We recommend using the new store2 module instead.
sha1.lua
Added by iNTERFACEWARE
Modules
A pure Lua-based implementation of the popular SHA-1 hashing function.
retry.lua
Added by iNTERFACEWARE
Modules
A module for retrying operations which might periodically fail like database operations.
mime.lua
Added by iNTERFACEWARE
Modules
Sends MIME-encoded email attachments using the SMTP protocol. A wrapper around net.smtp.send.
iguanaServer.lua
Added by iNTERFACEWARE
Modules
Provides programmatic access to various operations that can be performed on Iguana channels.
hl7.findSegment.lua
Added by iNTERFACEWARE
Modules
A utility for finding any HL7 segment in a parsed HL7 message node tree.
dateparse.lua
Added by iNTERFACEWARE
Modules
A fuzzy date/time parser that is very useful for automatically translating a wide variety of date/time formats.
custom_merge.lua
Added by iNTERFACEWARE
Modules
A customizable database merge method for Iguana 5.5.1 and up.
Showing 21 - 31 of 31 results
«12»

Topics

  • expandGetting Started
  • expandAdministration
    • expandInstallation
    • expandLicensing
    • expandUpgrades
    • expandDeployment
    • expandConfiguration Management
      • expandCustom Configuration
    • expandBackup and Restore
    • expandSecurity
      • expandHIPAA Compliance
    • expandTroubleshooting
  • expandDeveloping Interfaces
    • expandArchitecture
    • expandInterfaces
      • expandHL7
      • expandDatabase
        • expandConnect
      • expandWeb Services
      • expandCDA
      • expandX12
      • expandOther Interfaces
      • expandUtilities
    • expandRepositories
      • expandBuiltin Repositories
        • expandIguana Upgrade
        • expandIguana Tutorials
        • expandIguana Tools
        • expandIguana Protocols
        • expandIguana Files
        • expandIguana Date/Time
        • expandIguana Webservices
        • expandIguana Excel
      • expandRemote Repositories
      • expandCS Team Repositories
        • expandIguana Channels
    • expandSample Code
      • expandModules
      • expandUsing built-in functions
      • expandWorking with XML
    • expandLua Programming
    • expandPerformance
  • expandFAQs and TIPs
    • expandFrequently Asked Questions
      • expandInstalls and Upgrades
      • expandWeb Services
      • expandConfiguration
      • expandChannels
      • expandTranslator
      • expandOther
      • expandDatabase
      • expandAdministration
      • expandLogs
      • expandChameleon
    • expandTips
      • expandChannels
      • expandChameleon
      • expandWeb Services
      • expandSecurity
      • expandProgramming
      • expandOther
      • expandAdministration
  • expandReference
    • expandIguana Enterprise and Professional
    • expandProgram Settings
    • expandChannel Settings
    • expandDashboard
    • expandChannels
    • expandTranslator
    • expandLogs
      • expandLog Encryption
    • expandHTTP API
    • expandCDA API
    • expandError Messages
    • expandChameleon
    • expandIguana Change Log

Other Links

  • Training Center
  • News & Announcements
  • iNTERFACEWARE Blog
  • Older Documention (IGUANA v4 & Chameleon)
Copyright © iNTERFACEWARE Inc.