• Iguana 6
  • Previous Versions
  • API
  • Sample Code
  • Training
  • Create a Ticket
iNTERFACEWARE Help Center
  • Iguana 6
  • Previous Versions
  • API
  • Sample Code
  • Training
  • Create a Ticket

Code Repository

Home›Code Repository›rtf.lua
Modules

rtf.lua

Verified Featured
Added by iNTERFACEWARE

A module for converting a RTF file to plain text.

Source Code
local rtf={}
 
-- RTF module - used to convert RTF document into plain text.
-- http://help.interfaceware.com/code/details/rtf-lua
 
-- A simple set implementation in Lua
local function Set (list)
  local set = {}
  for _, l in ipairs(list) do set[l] = true end
  return set
end
 
-- Which control sequences to ignore
local IgnoreSet = Set { 'info', 'fonttbl', 'colortbl', 'stylesheet', '*' }
local Out = '';
local StateStack = {};
local State;
 
-- States of the state machine
local PLAINTEXT = 1
local CONTROL = 2
local ARGUMENT = 3
local BACKSLASH = 4
local ESCAPED_CHAR = 5
 
-- Character destinationions
local USE = 0
local IGNORE = 1
 
-- To print an exception only the first time
local UnexpectedCharFound = false;
 
-- Sets current character destination (IGNORE or USE)
local function setDest (D)
   Dest = D    
end
 
local function pushState ()
   table.insert (StateStack, { ['Dest'] = Dest } )
end
 
local function popState ()
   local EL = table.remove (StateStack)
   setDest  (EL['Dest'])
end
 
-- Collect or ignore the character based on the current destination
local function putChar (C, B)
   if C == '\r' then
      C = '\n'
   end
   if Dest ~= IGNORE then
      Out = Out..C
   end
end
 
local function isAlpha (C)
   return string.match (C, "%a") ~= nil
end
 
local function isDigit (C)
   return string.match (C, "%d") ~= nil
end
 
local function isSpace (C)
   return string.match (C, "%s") ~= nil
end
 
-- Process an RTF control word
-- T is token
-- A is argument
local function doControl (T, A)
   if T == 'par' then
      putChar ('\n')
   elseif IgnoreSet[T] then
      setDest (IGNORE)
   end
end
 
local function feedChar (C, B)
   local function nextState (C, B, CheckSpace)
      if C == '\\' then
         State = BACKSLASH
      elseif C == '{' then
         pushState ()
      elseif C == '}' then
         popState ()
      else
         if not CheckSpace or not isSpace (C) then
            putChar (C, B)
         end
      end       
   end
 
   if State == PLAINTEXT then
      nextState (C, B, false)
   elseif State == BACKSLASH then
      if C == '\\' or C == '{' or C == '}' then
         putChar (C)
         State = PLAINTEXT
      else
         if isAlpha (C) or C == '*' or C == '-' or C == '|' then
            State = CONTROL
            Token = C
         elseif C == "'" then
            State = ESCAPED_CHAR
            EscapedChar = ''
         elseif C == '\\' or C == '{' or C == '}' then
            putChar (C)
            State = PLAINTEXT
         elseif C == '~' then
            putChar (' ')
            state = PLAINTEXT
         else
            if (UnexpectedCharFound ~= true) then
               print ('Exception: unxepected '..C..' after \\')
               UnexpectedCharFound = true
            end
         end           
      end
   elseif State == ESCAPED_CHAR then
      EscapedChar = EscapedChar..C
      if #EscapedChar == 2 then
         C = string.char (tonumber (EscapedChar, 16))
         putChar (C)
         State = PLAINTEXT
      end
   elseif State == CONTROL then
      if isAlpha (C) then
         Token = Token..C
      elseif isDigit (C) or C == '-' then
         State = ARGUMENT
         Arg = C
      else
         doControl (Token, Arg)
         State = PLAINTEXT
         nextState (C, B, true)
      end
   elseif State == ARGUMENT then
      if isDigit (C) then
         Arg = Arg .. C
      else
         State = PLAINTEXT
         doControl (Token, Arg)
         nextState (C, B, true)
      end
   end
end
 
-- 
-- Public API
-- 
 
-- Given an RTF document as a Lua string (Data), return the text
-- portion of the document.
function rtf.toTxt(Data)
   Out = ''
   StateStack = { }
   State = PLAINTEXT
   setDest (USE)
 
   Data = string.gsub (Data, '\r', '')
   Data = string.gsub (Data, '\n', '')
 
   local i = 1
   for i = 1, #Data do
      local B = string.byte (Data, i)
      feedChar (string.char (B), B)
   end
   return Out
end

local rtf_toTxt = {
   Title="rtf.toTxt";
   Usage="rtf.toTxt(rtf)",
   SummaryLine="Converts an RTF to plain text.",
   Desc=[[Converts an RTF to plain text removing all formatting in the process.
   <p>The rtf.toTxt() function takes an RTF document, and strips out all the 
   formatting and non-text objects to produce plain text.
   ]];
   ["Returns"] = {
      {Desc="RTF as plain with all formatting removed  <u>string</u>."},
   };
   ParameterTable= true,
   Parameters= {
      {rtf= {Desc='A string containing RTF data <u>string</u>.'}},
   };
   Examples={
      [[   -- Read an rtf file into the Content string variable
   local FileName = iguana.project.root() ..'/'..iguana.project.guid()..'/sample.rtf'
   local F = io.open(FileName,'r')
   local Content = F:read('*a')
   F:close()
   
   -- Now convert the content into plain text.
   local Text = rtf.toTxt(Content)
   trace(Text)
   -- Of course formatting is lost...
   ]],
   };
   SeeAlso={
      {
         Title="rtf.lua - in our code repository",
         Link="http://help.interfaceware.com/code/details/rtf-lua"
      },
      {
         Title="RTF conversion example",
         Link="http://help.interfaceware.com/v6/rtf-conversion-example"
      }
   }
}

help.set{input_function=rtf.toTxt, help_data=rtf_toTxt}

 
return rtf
Description
A module for converting a RTF file to plain text.
Attachments
rtf.zip
Usage Details

The code contains an rtf.toTxt() function that takes an RTF document, and strips out all the formatting and non-text objects to produce plain text.

How to use rtf.lua:

  • Create a new shared module called “rtf” and paste in the code above
  • Add local rtf = require ‘rtf’ at the top of the main module
  • Test using the attached sample.rtf, or your own rtf file

Example code for main:

local rtf = require 'rtf'
-- Simple module to convert RTF file into text.
function main()
   io.input('sample.rtf')
   Rtf=io.read("*all")

   -- Now convert the content into plain text.
   local text=rtf.toTxt(Rtf)
   trace(text)
   -- Of course formatting is lost...
end
More Information
Converting RTF to plain text
Bookmark
  • Reviews
  • Related Listings
Filter
Sort by: Newest First
  • Oldest First
  • Rating
  • Helpfulness
Write a Review
Rating
Keyword
Filter
Sort by: Title
  • Newest First
  • Oldest First
  • Most Reviews
  • Highest Rated
Rating
iNTERFACEWARE
age.lua
Added by iNTERFACEWARE
Modules
This module calculates age from DOB, it returns years, months and partial years (i.e., 17, 3, 17.296272)
auth.lua
Added by iNTERFACEWARE
Modules
A module that does basic authentication for incoming web requests
batch.lua
Added by iNTERFACEWARE
Modules
A module to help processing batched HL7 messages
codemap.lua
Added by iNTERFACEWARE
Modules
This module is used to map one set of codes to another set of codes, or to validate code membership in a set
csv_parse.lua
Added by iNTERFACEWARE
Modules
A module for parsing well-formed CSV files.
custom_merge.lua
Added by iNTERFACEWARE
Modules
A customizable database merge method for Iguana 5.5.1 and up.
dateparse.lua
Added by iNTERFACEWARE
Modules
A fuzzy date/time parser that is very useful for automatically translating a wide variety of date/time formats.
dup.lua
Added by iNTERFACEWARE
Modules
Duplicate message filter.
edifact.lua
Added by iNTERFACEWARE
Modules
Convert EDI messages to HL7 format so you can process them like an HL7 message (and convert them back to EDI afterwards)
hl7.findSegment.lua
Added by iNTERFACEWARE
Modules
A utility for finding any HL7 segment in a parsed HL7 message node tree.
hl7.serialize.lua
Added by iNTERFACEWARE
Modules
Serializes an HL7 message using specified non-standard delimiters and/or escape characters
hl7.zsegment.lua
Added by iNTERFACEWARE
Modules
Generic Z segment parser. Parses Z segments without needing grammar definitions in the VMD file.
iguanaServer.lua
Added by iNTERFACEWARE
Modules
Provides programmatic access to various operations that can be performed on Iguana channels.
llp.lua
Added by iNTERFACEWARE
Modules
Allows you to use LLP connections from a Translator script
mime.lua
Added by iNTERFACEWARE
Modules
Sends MIME-encoded email attachments using the SMTP protocol. A wrapper around net.smtp.send.
resubmit.lua
Added by iNTERFACEWARE
Modules
Resubmit a logged message to an Iguana channel using the unique reference number (refmsgid).
retry.lua
Added by iNTERFACEWARE
Modules
A module for retrying operations which might periodically fail like database operations.
scheduler.lua
Added by iNTERFACEWARE
Modules
Schedule jobs to run at a specified time of day, very useful for batch processing
scrub.lua
Added by iNTERFACEWARE
Modules
The “scrub” module given below redacts sensitive information from HL7 messages.
sha1.lua
Added by iNTERFACEWARE
Modules
A pure Lua-based implementation of the popular SHA-1 hashing function.
Showing 1 - 20 of 31 results
«12»

Topics

  • expandGetting Started
  • expandAdministration
    • expandInstallation
    • expandLicensing
    • expandUpgrades
    • expandDeployment
    • expandConfiguration Management
      • expandCustom Configuration
    • expandBackup and Restore
    • expandSecurity
      • expandHIPAA Compliance
    • expandTroubleshooting
  • expandDeveloping Interfaces
    • expandArchitecture
    • expandInterfaces
      • expandHL7
      • expandDatabase
        • expandConnect
      • expandWeb Services
      • expandCDA
      • expandX12
      • expandOther Interfaces
      • expandUtilities
    • expandRepositories
      • expandBuiltin Repositories
        • expandIguana Upgrade
        • expandIguana Tutorials
        • expandIguana Tools
        • expandIguana Protocols
        • expandIguana Files
        • expandIguana Date/Time
        • expandIguana Webservices
        • expandIguana Excel
      • expandRemote Repositories
      • expandCS Team Repositories
        • expandIguana Channels
    • expandSample Code
      • expandModules
      • expandUsing built-in functions
      • expandWorking with XML
    • expandLua Programming
    • expandPerformance
  • expandFAQs and TIPs
    • expandFrequently Asked Questions
      • expandInstalls and Upgrades
      • expandWeb Services
      • expandConfiguration
      • expandChannels
      • expandTranslator
      • expandOther
      • expandDatabase
      • expandAdministration
      • expandLogs
      • expandChameleon
    • expandTips
      • expandChannels
      • expandChameleon
      • expandWeb Services
      • expandSecurity
      • expandProgramming
      • expandOther
      • expandAdministration
  • expandReference
    • expandIguana Enterprise and Professional
    • expandProgram Settings
    • expandChannel Settings
    • expandDashboard
    • expandChannels
    • expandTranslator
    • expandLogs
      • expandLog Encryption
    • expandHTTP API
    • expandCDA API
    • expandError Messages
    • expandChameleon
    • expandIguana Change Log

Other Links

  • Training Center
  • News & Announcements
  • iNTERFACEWARE Blog
  • Older Documention (IGUANA v4 & Chameleon)
Copyright © iNTERFACEWARE Inc.