|
|
We have covered most of the shell-specific elements of a style analysis program, except for two components: the global constants set up at the top of the file, and the function analyze, which reports on the readability indices of a file. Here is a complete listing of the program. (See below for a commentary on the features that have not yet been covered.)
1 : #----------------------------------------------------- 2 : # 3 : # rap -- Readability Analysis Program 4 : # 5 : # Purpose: provide readability analysis of texts to: 6 : # Kincaid formula, ARI, Coleman-Liau Formula, Flesch 7 : # Reading Ease Score. Also word count, sentence length, 8 : # word length. 9 : # 10 : # Note that rap is _not_ as functional as style(CT), 11 : # which is dictionary-driven; this is the outcome of 12 : # a deliberate attempt to keep everything in a single 13 : # shell script. 14 : # 15 : #------------- define program constants here ---------- 16 : # 17 : DEBUG=${DEBUG:-true} 18 : CLS=`tput clear` 19 : HILITE=`tput smso` 20 : NORMAL=`tput rmso` 21 : # 22 : #----- define the lexical structure of a sentence ----- 23 : # 24 : # a `word' primitive is any sequence of characters. 25 : # 26 : WORD='[A-Za-z1-90]+' 27 : # 28 : # whitespace is what goes between real words in a sentence; 29 : # it includes carriage returns so sentences can cross line 30 : # boundaries. 31 : # 32 : WHITESPACE="[[:space:]]" 33 : # 34 : # an initial -- one or two letters followed by a period -- 35 : # is defined so we call tell that it is not a short sentence. 36 : # (Otherwise Ph.D. would be counted as two sentences.) 37 : # 38 : INITIAL="($WHITESPACE|.)(([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9]).)" 39 : # 40 : # syllabic consonants; consonants including letter pairs: 41 : # 42 : CONS="[bcdfghjklmnpqrstvwxyz]|ll|ght|qu|([wstgpc]h)|sch" 43 : # 44 : # syllabic vowels; include the ly suffix 45 : # 46 : VOWL="[aeiou]+|ly" 47 : # 48 : # definition of a syllable (after Webster's Collegiate Dictionary) 49 : # 50 : SYL="(${CONS})\ 51 : ((${CONS})|((${VOWL})+))\ 52 : (${CONS})" 53 : # 54 : # Finally, a sentence consists of (optionally) repeated 55 : # sequences of one word followed by zero or more 56 : # whitespaces, terminated by a period. 57 : # 58 : SENT="($WORD($WHITESPACE))+." 59 : # 60 : #---------- initialize some local variables ----------- 61 : # 62 : SCRIPT=$0 63 : help='no' ; verbose=' ' ; record=' ' 64 : next_log_state='ON'; log='OFF' ; batch=' ' 65 : file=' ' ; fname=' ' ; LOGFILE=$$.log 66 : # 67 : #--------------- define program traps here ------------ 68 : # 69 : trap "strike_any_key" 1 2 3 15 70 : # 71 : #----------------- useful subroutines ----------------- 72 : # 73 : getc () 74 : { 75 : stty raw 76 : tmp=`dd bs=1 count=1 2>/dev/null` 77 : eval $1='$tmp' 78 : stty cooked 79 : } 80 : # 81 : #----------------------------------------------------- 82 : # 83 : toggle_logging () 84 : { 85 : log=$next_log_state 86 : case $log in 87 : ON) next_log_state=OFF ;; 88 : OFF) next_log_state=ON ;; 89 : esac 90 : } 91 : # 92 : #----------------------------------------------------- 93 : # 94 : get_fname () 95 : { 96 : echo "Enter a filename: \c" 97 : read newfname 98 : fname=${newfname:-${fname}} 99 : } 100 : # 101 : #------------------------------------------------------ 102 : # 103 : strike_any_key() 104 : { 105 : echo ' 106 : strike any key to continue ...\c' 107 : getc junk 108 : echo $CLS 109 : } 110 : # 111 : #----------------------------------------------------- 112 : # 113 : change_dir () 114 : { 115 : echo "Enter a directory: \c" 116 : read newdir 117 : newdir=${newdir:-`pwd`} 118 : cd $newdir 119 : echo "Directory set to: $newdir" 120 : } 121 : # 122 : #----------------------------------------------------- 123 : # 124 : _help() 125 : { 126 : echo " 127 : 128 : Readability Analysis Program 129 : 130 : A shell/awk demo to determine the readability grade of texts 131 : 132 : Usage: 133 : 134 : Either invoke with no options for full menu-driven 135 : activity, or use the following flags: 136 : 137 : -[h|H] prints this help 138 : -l cause output to be logged to a file 139 : -f file enter the name of the file to check 140 : -b run in batch mode (no menus) 141 : " 142 : } 143 : # 144 : #---------- define the menu handler functions here ---- 145 : get_file() 146 : { 147 : while : 148 : do 149 : echo $CLS 150 : echo " 151 : 152 : ${HILITE}Select a file${NORMAL} 153 : 154 : Current file is: [${HILITE} $fname ${NORMAL}] 155 : 156 : Type the letter corresponding to your current task: 157 : 158 : [space] Enter a filename or pattern to use 159 : l List the current directory 160 : c Change current directory 161 : q quit back to main menu 162 : 163 : 164 : =======>\c" 165 : getc char 166 : case $char in 167 : ' ') get_fname ;; 168 : 'l') ls | ${PAGER:-more} ;; 169 : 'c') change_dir ;; 170 : 'q') break ;; 172 : esac 173 : strike_any_key 174 : done 175 : } 176 : # 177 : #------------------------------------------------------ 178 : # 179 : analyze() 180 : { 181 : if [ $fname = " " ] 182 : then 183 : echo " 184 : 185 : You must specify a filename first 186 : " 187 : strike_any_key 188 : return 1 189 : fi 190 : wordcount=`wc -w < $fname` 191 : lines=`wc -l < $fname` 192 : nonwhitespace=`sed -e "/${WHITESPACE}/s///g" < $fname | wc -l` 193 : sentences=`awk -e ' BEGIN { sentences = 0 194 : target = "" 195 : marker = "+X+" 196 : } 197 : { target = target " " $0 198 : initials = gsub(init, "", target) 199 : hit = gsub(sent, marker, target) 200 : sentences += hit 201 : if (hit != 0) { 202 : for (i= 0; i < hit; i++) { 203 : found = index(target, marker) 204 : target = substr(target, found+3) 205 : } # end for 206 : } # end if 207 : hit = 0 208 : } 209 : END { print sentences } 210 : ' sent="$SENT" init="$INITIAL" < $fname` 211 : letters=`expr $nonwhitespace - $lines` 212 : sylcount=`awk -e ' BEGIN { sylcount = 0 } 213 : { target = $0 214 : sylcount += gsub(syllable, "", target) 215 : } 216 : END { print sylcount } 217 : ' syllable="$SYL" < $fname` 218 : echo " 219 : 220 : Number of words: $wordcount 221 : Number of syllables: $sylcount 222 : Number of sentences: $sentences 223 : 224 : " 225 : export letters wordcount sentences sylcount 226 : ARI=`bc << %% 227 : l = ($letters / $wordcount) 228 : w = ($wordcount / $sentences) 229 : 4.71 l +0.5 w -21.43 230 : %% 231 : ` 232 : Kincaid=`bc << %% 233 : w = ($wordcount / $sentences) 234 : s = ($sylcount / $wordcount) 235 : 11.8 s + 0.39 w - 15.59 236 : %% 237 : ` 238 : CLF=`bc << %% 239 : l = ($letters / $wordcount) 240 : s = ($sentences / ($wordcount / 100)) 241 : 5.89 l - 0.3 s - 15.8 242 : %% 243 : ` 244 : Flesch=`bc << %% 245 : w = ($wordcount / $sentences) 246 : s = ($sylcount / $wordcount) 247 : 206.835 - 84.6 s - 1.015 * w 248 : %% 249 : ` 250 : if [ log = "ON" ] 251 : then 252 : echo " 253 : ARI = $ARI 254 : Kincaid= $Kincaid 255 : Coleman-Liau = $CLF 256 : Flesch Reading Ease = $Flesch" > $LOGFILE 257 : fi 258 : echo "ARI = $ARI 259 : Kincaid= $Kincaid 260 : Coleman-Liau = $CLF 261 : Flesch Reading Ease = $Flesch" > /dev/tty 262 : } 263 : # 264 : #=========== THIS IS WHERE THE PROGRAM BEGINS ========= 265 : # 266 : # 267 : #---------- parse the command line--------------------- 268 : # 269 : while getopts hHvlbf: result 270 : do 271 : case $result in 272 : h|H) help="yes" ;; 273 : v) verbose="yes" ;; 274 : l) record="yes" 275 : next_log_state=off 276 : log=ON ;; 277 : b) batch="yes" ;; 278 : f) file="yes" 279 : fname=${OPTARG:-" "} ;; 280 : ) help="yes" ;; 281 : esac 282 : done 283 : if [ $help = "yes" ] 284 : then 285 : _help 286 : exit 1 287 : fi 288 : if [ $batch = "yes" ] 289 : then 290 : analyze 291 : exit 0 292 : fi 293 : # 294 : #---------- enter the mainloop ------------------------ 295 : # 296 : while : 297 : do 298 : echo $CLS 299 : echo " 300 : 301 : ${HILITE}Readability Analysis Program${NORMAL} 302 : 303 : Type the letter corresponding to your current task: 304 : 305 : f Select files to analyze [now ${HILITE}$fname${NORMAL} ] 306 : p Perform analyses 307 : l switch ${next_log_state} report logging [now ${HILITE}$log${NORMAL}] 308 : q quit program 309 : 310 : 311 : =======>\c" 312 : getc char 313 : case $char in 314 : 'f') getloop=1 315 : get_file ;; 316 : 'p') analyze 317 : strike_any_key ;; 318 : 'l') toggle_logging ;; 319 : 'q') break ;; 320 : (**) continue ;; 321 : esac 322 : done 323 : clear 324 : exit 0
The variable definitions from lines 17 to 65 set up some constants for screen clearing and highlighting, initialize variables for use in the script, and define some extended regular expressions, as explained in ``Regular expressions'', that are used later to scan the target file for initials, sentences, and syllables. The mechanism used to conduct the scan is a pair of scripts written in the awk programming language (explained in ``Using awk'') that identify the number of sentences in a file, and the number of syllables in the file. These scripts lie between lines 190 and 217; they are explained in detail in ``Spanning multiple lines''.