• Iguana 6
  • Previous Versions
  • API
  • Sample Code
  • Forums
  • Training
  • Create a Ticket
iNTERFACEWARE Help Center
  • Iguana 6
  • Previous Versions
  • API
  • Sample Code
  • Forums
  • Training
  • Create a Ticket

Code Repository

Home›Code Repository›rtf.lua
Modules

rtf.lua

Verified Featured
Added by iNTERFACEWARE

A module for converting a RTF file to plain text.

Source Code
local rtf={}
 
-- RTF module - used to convert RTF document into plain text.
-- http://help.interfaceware.com/code/details/rtf-lua
 
-- A simple set implementation in Lua
local function Set (list)
  local set = {}
  for _, l in ipairs(list) do set[l] = true end
  return set
end
 
-- Which control sequences to ignore
local IgnoreSet = Set { 'info', 'fonttbl', 'colortbl', 'stylesheet', '*' }
local Out = '';
local StateStack = {};
local State;
 
-- States of the state machine
local PLAINTEXT = 1
local CONTROL = 2
local ARGUMENT = 3
local BACKSLASH = 4
local ESCAPED_CHAR = 5
 
-- Character destinationions
local USE = 0
local IGNORE = 1
 
-- To print an exception only the first time
local UnexpectedCharFound = false;
 
-- Sets current character destination (IGNORE or USE)
local function setDest (D)
   Dest = D    
end
 
local function pushState ()
   table.insert (StateStack, { ['Dest'] = Dest } )
end
 
local function popState ()
   local EL = table.remove (StateStack)
   setDest  (EL['Dest'])
end
 
-- Collect or ignore the character based on the current destination
local function putChar (C, B)
   if C == '\r' then
      C = '\n'
   end
   if Dest ~= IGNORE then
      Out = Out..C
   end
end
 
local function isAlpha (C)
   return string.match (C, "%a") ~= nil
end
 
local function isDigit (C)
   return string.match (C, "%d") ~= nil
end
 
local function isSpace (C)
   return string.match (C, "%s") ~= nil
end
 
-- Process an RTF control word
-- T is token
-- A is argument
local function doControl (T, A)
   if T == 'par' then
      putChar ('\n')
   elseif IgnoreSet[T] then
      setDest (IGNORE)
   end
end
 
local function feedChar (C, B)
   local function nextState (C, B, CheckSpace)
      if C == '\\' then
         State = BACKSLASH
      elseif C == '{' then
         pushState ()
      elseif C == '}' then
         popState ()
      else
         if not CheckSpace or not isSpace (C) then
            putChar (C, B)
         end
      end       
   end
 
   if State == PLAINTEXT then
      nextState (C, B, false)
   elseif State == BACKSLASH then
      if C == '\\' or C == '{' or C == '}' then
         putChar (C)
         State = PLAINTEXT
      else
         if isAlpha (C) or C == '*' or C == '-' or C == '|' then
            State = CONTROL
            Token = C
         elseif C == "'" then
            State = ESCAPED_CHAR
            EscapedChar = ''
         elseif C == '\\' or C == '{' or C == '}' then
            putChar (C)
            State = PLAINTEXT
         elseif C == '~' then
            putChar (' ')
            state = PLAINTEXT
         else
            if (UnexpectedCharFound ~= true) then
               print ('Exception: unxepected '..C..' after \\')
               UnexpectedCharFound = true
            end
         end           
      end
   elseif State == ESCAPED_CHAR then
      EscapedChar = EscapedChar..C
      if #EscapedChar == 2 then
         C = string.char (tonumber (EscapedChar, 16))
         putChar (C)
         State = PLAINTEXT
      end
   elseif State == CONTROL then
      if isAlpha (C) then
         Token = Token..C
      elseif isDigit (C) or C == '-' then
         State = ARGUMENT
         Arg = C
      else
         doControl (Token, Arg)
         State = PLAINTEXT
         nextState (C, B, true)
      end
   elseif State == ARGUMENT then
      if isDigit (C) then
         Arg = Arg .. C
      else
         State = PLAINTEXT
         doControl (Token, Arg)
         nextState (C, B, true)
      end
   end
end
 
-- 
-- Public API
-- 
 
-- Given an RTF document as a Lua string (Data), return the text
-- portion of the document.
function rtf.toTxt(Data)
   Out = ''
   StateStack = { }
   State = PLAINTEXT
   setDest (USE)
 
   Data = string.gsub (Data, '\r', '')
   Data = string.gsub (Data, '\n', '')
 
   local i = 1
   for i = 1, #Data do
      local B = string.byte (Data, i)
      feedChar (string.char (B), B)
   end
   return Out
end

local rtf_toTxt = {
   Title="rtf.toTxt";
   Usage="rtf.toTxt(rtf)",
   SummaryLine="Converts an RTF to plain text.",
   Desc=[[Converts an RTF to plain text removing all formatting in the process.
   <p>The rtf.toTxt() function takes an RTF document, and strips out all the 
   formatting and non-text objects to produce plain text.
   ]];
   ["Returns"] = {
      {Desc="RTF as plain with all formatting removed  <u>string</u>."},
   };
   ParameterTable= true,
   Parameters= {
      {rtf= {Desc='A string containing RTF data <u>string</u>.'}},
   };
   Examples={
      [[   -- Read an rtf file into the Content string variable
   local FileName = iguana.project.root() ..'/'..iguana.project.guid()..'/sample.rtf'
   local F = io.open(FileName,'r')
   local Content = F:read('*a')
   F:close()
   
   -- Now convert the content into plain text.
   local Text = rtf.toTxt(Content)
   trace(Text)
   -- Of course formatting is lost...
   ]],
   };
   SeeAlso={
      {
         Title="rtf.lua - in our code repository",
         Link="http://help.interfaceware.com/code/details/rtf-lua"
      },
      {
         Title="RTF conversion example",
         Link="http://help.interfaceware.com/v6/rtf-conversion-example"
      }
   }
}

help.set{input_function=rtf.toTxt, help_data=rtf_toTxt}

 
return rtf
Description
A module for converting a RTF file to plain text.
Attachments
rtf.zip
Usage Details

The code contains an rtf.toTxt() function that takes an RTF document, and strips out all the formatting and non-text objects to produce plain text.

How to use rtf.lua:

  • Create a new shared module called “rtf” and paste in the code above
  • Add local rtf = require ‘rtf’ at the top of the main module
  • Test using the attached sample.rtf, or your own rtf file

Example code for main:

local rtf = require 'rtf'
-- Simple module to convert RTF file into text.
function main()
   io.input('sample.rtf')
   Rtf=io.read("*all")

   -- Now convert the content into plain text.
   local text=rtf.toTxt(Rtf)
   trace(text)
   -- Of course formatting is lost...
end
More Information
Converting RTF to plain text
Bookmark
  • Reviews
  • Related Listings
Filter
Sort by: Newest First
  • Oldest First
  • Rating
  • Helpfulness
Write a Review
Rating
Keyword
Filter
Sort by: Title
  • Newest First
  • Oldest First
  • Most Reviews
  • Highest Rated
Rating
iNTERFACEWARE
sha1.lua
Added by iNTERFACEWARE
Modules
A pure Lua-based implementation of the popular SHA-1 hashing function.
sqlite.lua
Added by iNTERFACEWARE
Modules
A module to create a SQLite database and generate the tables specified in a VMD
store.lua
Added by iNTERFACEWARE
Modules
The "original" store module: Allows you to store key/value pairs in a persistent storage mechanism. We recommend using the new store2 module instead.
store2.lua
Added by iNTERFACEWARE
Modules
Provides a simple interface to store key/value pairs in a persistent storage mechanism.
stream.lua
Added by iNTERFACEWARE
Modules
This module performs basic stream processing, read, write, save to file, convert to text etc.
stringutil.lua
Added by iNTERFACEWARE
Modules
A library of helpful extensions to the standard Lua string library.
test_all.lua
Added by iNTERFACEWARE
Modules
Test a script against all the sample messages loaded for the component
throttle.lua
Added by iNTERFACEWARE
Modules
Throttle a process during peak hours, by slowing down the code.
throttleDB.lua
Added by iNTERFACEWARE
Modules
Throttle database access by reducing the number of inserts during peak hours
urlcode.lua
Added by iNTERFACEWARE
Modules
A module for parsing URL encoded GET/POST sequences
validate.lua
Added by iNTERFACEWARE
Modules
A template module for testing HL7 message conformance, you will need to extend it to match your requirements
xml.lua
Added by iNTERFACEWARE
Modules
A collection of helpful XML node functions.
Showing 21 - 32 of 32 results
«12»

Topics

  • expandGetting Started
  • expandAdministration
    • expandInstallation
    • expandLicensing
    • expandUpgrades
    • expandDeployment
    • expandConfiguration Management
      • expandCustom Configuration
    • expandBackup and Restore
    • expandSecurity
      • expandHIPAA Compliance
    • expandTroubleshooting
  • expandDeveloping Interfaces
    • expandArchitecture
    • expandInterfaces
      • expandHL7
      • expandDatabase
        • expandConnect
      • expandWeb Services
      • expandCDA
      • expandX12
      • expandOther Interfaces
      • expandUtilities
    • expandRepositories
      • expandBuiltin Repositories
        • expandIguana Upgrade
        • expandIguana Tutorials
        • expandIguana Tools
        • expandIguana Protocols
        • expandIguana Files
        • expandIguana Date/Time
        • expandIguana Webservices
        • expandIguana Excel
      • expandRemote Repositories
      • expandCS Team Repositories
        • expandIguana Channels
    • expandSample Code
      • expandModules
      • expandUsing built-in functions
      • expandWorking with XML
    • expandLua Programming
    • expandPerformance
  • expandFAQs and TIPs
    • expandFrequently Asked Questions
      • expandInstalls and Upgrades
      • expandWeb Services
      • expandConfiguration
      • expandChannels
      • expandTranslator
      • expandOther
      • expandDatabase
      • expandAdministration
      • expandLogs
      • expandChameleon
    • expandTips
      • expandChannels
      • expandChameleon
      • expandWeb Services
      • expandSecurity
      • expandProgramming
      • expandOther
      • expandAdministration
  • expandReference
    • expandIguana Enterprise and Professional
    • expandProgram Settings
    • expandChannel Settings
    • expandDashboard
    • expandChannels
    • expandTranslator
    • expandLogs
      • expandLog Encryption
    • expandHTTP API
    • expandCDA API
    • expandError Messages
    • expandChameleon
    • expandIguana Change Log

Other Links

  • Training Center
  • News & Announcements
  • iNTERFACEWARE Blog
  • Older Documention (IGUANA v4 & Chameleon)
Copyright © iNTERFACEWARE Inc.