• Iguana 6
  • Previous Versions
  • API
  • Sample Code
  • Forums
  • Training
  • Create a Ticket
iNTERFACEWARE Help Center
  • Iguana 6
  • Previous Versions
  • API
  • Sample Code
  • Forums
  • Training
  • Create a Ticket

Code Repository

Home›Code Repository›rtf.lua
Modules

rtf.lua

Verified Featured
Added by iNTERFACEWARE

A module for converting a RTF file to plain text.

Source Code
local rtf={}
 
-- RTF module - used to convert RTF document into plain text.
-- http://help.interfaceware.com/code/details/rtf-lua
 
-- A simple set implementation in Lua
local function Set (list)
  local set = {}
  for _, l in ipairs(list) do set[l] = true end
  return set
end
 
-- Which control sequences to ignore
local IgnoreSet = Set { 'info', 'fonttbl', 'colortbl', 'stylesheet', '*' }
local Out = '';
local StateStack = {};
local State;
 
-- States of the state machine
local PLAINTEXT = 1
local CONTROL = 2
local ARGUMENT = 3
local BACKSLASH = 4
local ESCAPED_CHAR = 5
 
-- Character destinationions
local USE = 0
local IGNORE = 1
 
-- To print an exception only the first time
local UnexpectedCharFound = false;
 
-- Sets current character destination (IGNORE or USE)
local function setDest (D)
   Dest = D    
end
 
local function pushState ()
   table.insert (StateStack, { ['Dest'] = Dest } )
end
 
local function popState ()
   local EL = table.remove (StateStack)
   setDest  (EL['Dest'])
end
 
-- Collect or ignore the character based on the current destination
local function putChar (C, B)
   if C == '\r' then
      C = '\n'
   end
   if Dest ~= IGNORE then
      Out = Out..C
   end
end
 
local function isAlpha (C)
   return string.match (C, "%a") ~= nil
end
 
local function isDigit (C)
   return string.match (C, "%d") ~= nil
end
 
local function isSpace (C)
   return string.match (C, "%s") ~= nil
end
 
-- Process an RTF control word
-- T is token
-- A is argument
local function doControl (T, A)
   if T == 'par' then
      putChar ('\n')
   elseif IgnoreSet[T] then
      setDest (IGNORE)
   end
end
 
local function feedChar (C, B)
   local function nextState (C, B, CheckSpace)
      if C == '\\' then
         State = BACKSLASH
      elseif C == '{' then
         pushState ()
      elseif C == '}' then
         popState ()
      else
         if not CheckSpace or not isSpace (C) then
            putChar (C, B)
         end
      end       
   end
 
   if State == PLAINTEXT then
      nextState (C, B, false)
   elseif State == BACKSLASH then
      if C == '\\' or C == '{' or C == '}' then
         putChar (C)
         State = PLAINTEXT
      else
         if isAlpha (C) or C == '*' or C == '-' or C == '|' then
            State = CONTROL
            Token = C
         elseif C == "'" then
            State = ESCAPED_CHAR
            EscapedChar = ''
         elseif C == '\\' or C == '{' or C == '}' then
            putChar (C)
            State = PLAINTEXT
         elseif C == '~' then
            putChar (' ')
            state = PLAINTEXT
         else
            if (UnexpectedCharFound ~= true) then
               print ('Exception: unxepected '..C..' after \\')
               UnexpectedCharFound = true
            end
         end           
      end
   elseif State == ESCAPED_CHAR then
      EscapedChar = EscapedChar..C
      if #EscapedChar == 2 then
         C = string.char (tonumber (EscapedChar, 16))
         putChar (C)
         State = PLAINTEXT
      end
   elseif State == CONTROL then
      if isAlpha (C) then
         Token = Token..C
      elseif isDigit (C) or C == '-' then
         State = ARGUMENT
         Arg = C
      else
         doControl (Token, Arg)
         State = PLAINTEXT
         nextState (C, B, true)
      end
   elseif State == ARGUMENT then
      if isDigit (C) then
         Arg = Arg .. C
      else
         State = PLAINTEXT
         doControl (Token, Arg)
         nextState (C, B, true)
      end
   end
end
 
-- 
-- Public API
-- 
 
-- Given an RTF document as a Lua string (Data), return the text
-- portion of the document.
function rtf.toTxt(Data)
   Out = ''
   StateStack = { }
   State = PLAINTEXT
   setDest (USE)
 
   Data = string.gsub (Data, '\r', '')
   Data = string.gsub (Data, '\n', '')
 
   local i = 1
   for i = 1, #Data do
      local B = string.byte (Data, i)
      feedChar (string.char (B), B)
   end
   return Out
end

local rtf_toTxt = {
   Title="rtf.toTxt";
   Usage="rtf.toTxt(rtf)",
   SummaryLine="Converts an RTF to plain text.",
   Desc=[[Converts an RTF to plain text removing all formatting in the process.
   <p>The rtf.toTxt() function takes an RTF document, and strips out all the 
   formatting and non-text objects to produce plain text.
   ]];
   ["Returns"] = {
      {Desc="RTF as plain with all formatting removed  <u>string</u>."},
   };
   ParameterTable= true,
   Parameters= {
      {rtf= {Desc='A string containing RTF data <u>string</u>.'}},
   };
   Examples={
      [[   -- Read an rtf file into the Content string variable
   local FileName = iguana.project.root() ..'/'..iguana.project.guid()..'/sample.rtf'
   local F = io.open(FileName,'r')
   local Content = F:read('*a')
   F:close()
   
   -- Now convert the content into plain text.
   local Text = rtf.toTxt(Content)
   trace(Text)
   -- Of course formatting is lost...
   ]],
   };
   SeeAlso={
      {
         Title="rtf.lua - in our code repository",
         Link="http://help.interfaceware.com/code/details/rtf-lua"
      },
      {
         Title="RTF conversion example",
         Link="http://help.interfaceware.com/v6/rtf-conversion-example"
      }
   }
}

help.set{input_function=rtf.toTxt, help_data=rtf_toTxt}

 
return rtf
Description
A module for converting a RTF file to plain text.
Attachments
rtf.zip
Usage Details

The code contains an rtf.toTxt() function that takes an RTF document, and strips out all the formatting and non-text objects to produce plain text.

How to use rtf.lua:

  • Create a new shared module called “rtf” and paste in the code above
  • Add local rtf = require ‘rtf’ at the top of the main module
  • Test using the attached sample.rtf, or your own rtf file

Example code for main:

local rtf = require 'rtf'
-- Simple module to convert RTF file into text.
function main()
   io.input('sample.rtf')
   Rtf=io.read("*all")

   -- Now convert the content into plain text.
   local text=rtf.toTxt(Rtf)
   trace(text)
   -- Of course formatting is lost...
end
More Information
Converting RTF to plain text
Bookmark
  • Reviews
  • Related Listings
Filter
Sort by: Oldest First
  • Newest First
  • Rating
  • Helpfulness
Write a Review
Rating
Keyword
Filter
Sort by: Oldest First
  • Newest First
  • Title
  • Most Reviews
  • Highest Rated
Rating
iNTERFACEWARE
custom_merge.lua
Added by iNTERFACEWARE
Modules
A customizable database merge method for Iguana 5.5.1 and up.
dateparse.lua
Added by iNTERFACEWARE
Modules
A fuzzy date/time parser that is very useful for automatically translating a wide variety of date/time formats.
hl7.findSegment.lua
Added by iNTERFACEWARE
Modules
A utility for finding any HL7 segment in a parsed HL7 message node tree.
iguanaServer.lua
Added by iNTERFACEWARE
Modules
Provides programmatic access to various operations that can be performed on Iguana channels.
mime.lua
Added by iNTERFACEWARE
Modules
Sends MIME-encoded email attachments using the SMTP protocol. A wrapper around net.smtp.send.
retry.lua
Added by iNTERFACEWARE
Modules
A module for retrying operations which might periodically fail like database operations.
sha1.lua
Added by iNTERFACEWARE
Modules
A pure Lua-based implementation of the popular SHA-1 hashing function.
store.lua
Added by iNTERFACEWARE
Modules
The "original" store module: Allows you to store key/value pairs in a persistent storage mechanism. We recommend using the new store2 module instead.
stringutil.lua
Added by iNTERFACEWARE
Modules
A library of helpful extensions to the standard Lua string library.
dup.lua
Added by iNTERFACEWARE
Modules
Duplicate message filter.
xml.lua
Added by iNTERFACEWARE
Modules
A collection of helpful XML node functions.
urlcode.lua
Added by iNTERFACEWARE
Modules
A module for parsing URL encoded GET/POST sequences
csv_parse.lua
Added by iNTERFACEWARE
Modules
A module for parsing well-formed CSV files.
scrub.lua
Added by iNTERFACEWARE
Modules
The “scrub” module given below redacts sensitive information from HL7 messages.
throttleDB.lua
Added by iNTERFACEWARE
Modules
Throttle database access by reducing the number of inserts during peak hours
codemap.lua
Added by iNTERFACEWARE
Modules
This module is used to map one set of codes to another set of codes, or to validate code membership in a set
resubmit.lua
Added by iNTERFACEWARE
Modules
Resubmit a logged message to an Iguana channel using the unique reference number (refmsgid).
throttle.lua
Added by iNTERFACEWARE
Modules
Throttle a process during peak hours, by slowing down the code.
validate.lua
Added by iNTERFACEWARE
Modules
A template module for testing HL7 message conformance, you will need to extend it to match your requirements
scheduler.lua
Added by iNTERFACEWARE
Modules
Schedule jobs to run at a specified time of day, very useful for batch processing
Showing 1 - 20 of 32 results
«12»

Topics

  • expandGetting Started
  • expandAdministration
    • expandInstallation
    • expandLicensing
    • expandUpgrades
    • expandDeployment
    • expandConfiguration Management
      • expandCustom Configuration
    • expandBackup and Restore
    • expandSecurity
      • expandHIPAA Compliance
    • expandTroubleshooting
  • expandDeveloping Interfaces
    • expandArchitecture
    • expandInterfaces
      • expandHL7
      • expandDatabase
        • expandConnect
      • expandWeb Services
      • expandCDA
      • expandX12
      • expandOther Interfaces
      • expandUtilities
    • expandRepositories
      • expandBuiltin Repositories
        • expandIguana Upgrade
        • expandIguana Tutorials
        • expandIguana Tools
        • expandIguana Protocols
        • expandIguana Files
        • expandIguana Date/Time
        • expandIguana Webservices
        • expandIguana Excel
      • expandRemote Repositories
      • expandCS Team Repositories
        • expandIguana Channels
    • expandSample Code
      • expandModules
      • expandUsing built-in functions
      • expandWorking with XML
    • expandLua Programming
    • expandPerformance
  • expandFAQs and TIPs
    • expandFrequently Asked Questions
      • expandInstalls and Upgrades
      • expandWeb Services
      • expandConfiguration
      • expandChannels
      • expandTranslator
      • expandOther
      • expandDatabase
      • expandAdministration
      • expandLogs
      • expandChameleon
    • expandTips
      • expandChannels
      • expandChameleon
      • expandWeb Services
      • expandSecurity
      • expandProgramming
      • expandOther
      • expandAdministration
  • expandReference
    • expandIguana Enterprise and Professional
    • expandProgram Settings
    • expandChannel Settings
    • expandDashboard
    • expandChannels
    • expandTranslator
    • expandLogs
      • expandLog Encryption
    • expandHTTP API
    • expandCDA API
    • expandError Messages
    • expandChameleon
    • expandIguana Change Log

Other Links

  • Training Center
  • News & Announcements
  • iNTERFACEWARE Blog
  • Older Documention (IGUANA v4 & Chameleon)
Copyright © iNTERFACEWARE Inc.