'C text formatter characters count is off
So I have an assignment to format text according to rules and eventually print out the number of characters in the string (including \n
and spaces, anything but the \0
at the end of the string).
Basically, a valid input is ended with a dot but I think I have a few more whitespaces after the final dots.
I have tried several approaches such as loop that replaces spaces with \0
starting from the end of the string.
nothing seems to work though...
EDIT The requirements are:
- to convert double-dots(..) to a new line
- delete multiple spaces leaving only one,
- making sure that there isn't a space prior to a comma or a dot
- making sure that there is single space after a comma or dot.
- not changing the original content between apostrophes.
- and validating that there are Capital letters only in the correct places (new line\paragraph).
we are required to do all of the code in the main function (unfortunately) and me code usually mistake the characters count by 1-2 extra in the count (probably do to extra spaces after the last dot
this is an example of input that my code fails at counting characters
the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style .
int main() {
char ans;
printf("*************** Welcome to the text cleaner ***************\n\n");
do
{
int length, i, j = 0;
int word, sentence, para, space;
char tin[601], tout[601], * dex, * pos;
printf("\nPlease enter text:\n");
gets_s(tin, 600);
length = strlen(tin);
dex = strchr(tin, '.'); //converts double dots to new line
while (dex != NULL)
{
if (tin[dex - tin + 1] == '.') {
tin[dex - tin + 1] = '\n';
}
dex = strchr(dex + 1, '.');
}
length = strlen(tin);
dex = strchr(tin, ' '); //converting multiple spaces to single space
while (dex != NULL)
{
while (dex != NULL && tin[dex - tin + 1] == ' ')
{
for (i = dex - tin + 1; i < strlen(tin); i++)
{
tin[i - 1] = tin[i];
}
dex = strchr(dex, ' ');
j++;
}
dex = strchr(dex + 1, ' ');
}
tin[length - j] = '\0';
j = 0;
dex = strchr(tin, '\n');
while (dex != NULL && tin[dex-tin+1] == ' ') //delets spaces in the beggining of new row
{
for (i = dex - tin + 1;i < strlen(tin);i++) {
tin[i] = tin[i + 1];
}
dex = strchr(dex + 1, '\n');
}
dex = strchr(tin, ','); //deletes space before comma
while (dex != NULL && tin[dex - tin - 1] == ' ')
{
for (i = dex - tin - 1; i < strlen(tin); i++)
{
tin[i] = tin[i+1];
}
dex = strchr(dex+1, ',');
}
dex = strchr(tin, '.'); //deletes space before dots
while (dex != NULL && tin[dex - tin - 1] == ' ')
{
for (i = dex - tin - 1; i < strlen(tin); i++)
{
tin[i] = tin[i+1];
}
dex = strchr(dex + 1, '.');
}
dex = strchr(tin, ','); // adds space after comma
while (dex != NULL && tin[dex - tin + 1] != ' ')
{
if (tin[dex - tin + 1] != '\n')
{
tin[strlen(tin) + 1] = '\0';
for (i = strlen(tin); i > dex - tin; i--)
{
if (i == dex - tin + 1)
{
tin[i] = ' ';
}
else
{
tin[i] = tin[i - 1];
}
}
dex = strchr(dex + 1, ',');
}
}
dex = strchr(tin, '.'); // adds space after dot
while (dex != NULL && tin[dex - tin + 1] != ' ')
{
tin[strlen(tin) + 1] = '\0';
if (tin[dex - tin + 1] == '\n')
{
dex = strchr(dex + 1, '.');
}
else
{
for (i = strlen(tin); i > dex - tin; i--)
{
if (i == dex - tin + 1)
{
tin[i] = ' ';
}
else
{
tin[i] = tin[i - 1];
}
}
dex = strchr(dex + 1, '.');
}
}
strcpy_s(tout, sizeof(tout), tin);
_strlwr_s(tout,sizeof(tout)); //copies and lowercasing the input string
dex = strchr(tin, '"');
if (dex != NULL) {
pos = strchr(dex + 1, '"');
while (dex != NULL)
{
for (i = dex - tin; i < pos - tin; i++) {
tout[i] = tin[i];
}
dex = strchr(pos + 1, '"');
if (dex)
{
pos = strchr(dex + 1, '"');
}
} //making sure that the letters in the quotes have't been lowercased
}
_strupr_s(tin, sizeof(tin));
dex = strchr(tout, '.');
pos = strchr(tin, '.');
while (dex != NULL && pos != NULL)
{
tout[dex - tout + 2] = tin[pos - tin + 2];
dex = strchr(dex + 1, '.');
pos = strchr(pos + 1, '.');
}
//CAPSLOCK
dex = strchr(tout, '.'); //deletes space before dots
while (dex != NULL)
{
if (tout[dex - tout - 1] == ' ')
{
for (i = dex - tout - 1; i < strlen(tout); i++)
{
tout[i] = tout[i+1];
}
}
dex = strchr(dex + 1, '.');
}
if (tout[0] == ' ') {
for (i = 0 ;i < strlen(tout); i++) {
tout[i] = tout[i + 1];
}
}//handeling single space in the beggining of the string
if (tout[0] >= 'a' && tout[0] <= 'z') {
tout[0] -= 32;
} //First letter always capital
word = 0;
sentence = 0;
para = 1;
space = 0;
length = strlen(tout);
for (i = 0; tout[i] != '\0';i++)
{
if (tout[i] == ' ' && tout[i + 1] != ' ')
word++;
}
dex = strchr(tout, '.');
while (dex != NULL)
{
sentence++;
dex = strchr(dex + 1, '.');
}
dex = strchr(tout, '\n');
while (dex != NULL)
{
space++;
para++;
word++;
dex = strchr(dex + 1, '\n');
}
//dex = strchr(tout, '-');
//while (dex != NULL)
//{
// word++;
// dex = strchr(dex + 1, '-');
//}
printf_s("\nText after cleaning:\n------------------------------------------------------------------------------------------------\n");
printf_s("%s\n\n", tout);
printf_s("characters: %d | words: %d | sentences: %d | paragraphs: %d\n------------------------------------------------------------------------------------------------\n",length, word, sentence, para);
printf_s("\nIf you want to clean another string press (y): ");
scanf_s(" %c", &ans, 1);
if (ans == 'y')
{
gets_s(tin, 600);
}
} while (ans =='y');
Solution 1:[1]
As I mentioned in the top comments, this can be done in a single loop with state variables.
A few assumptions:
- Whenever we see
..
(which is converted to a newline), it starts a new paragraph - What you called "apostrophe", I'm calling a double quote (as that's the only thing that made sense).
- Within quotes nothing is converted
- Quotes [themselves] are copied over (i.e. not stripped)
Unfortunately, I had to completely refactor the code. It is annotated. I realize you have to only use main
. The extra functions are merely for debug, so they "don't count":
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int opt_d; // debug
int opt_quo; // preserve quote
#if DEBUG
#define dbgprt(_fmt...) \
do { \
if (opt_d) \
printf(_fmt); \
} while (0)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
#define COPYX(_chr) \
do { \
*dst++ = _chr; \
dbgprt("COPY %2.2X/%s\n",_chr,showchr(_chr)); \
} while (0)
#define COPY \
COPYX(chr)
#define WHITEOUT \
do { \
if (! white) \
break; \
COPYX(' '); \
white = 0; \
ctr_word += 1; \
} while (0)
const char *
showchr(int chr)
{
static char buf[10];
if ((chr >= 0x20) && (chr <= 0x7E))
sprintf(buf,"%c",chr);
else
sprintf(buf,"{%2.2X}",chr);
return buf;
}
void
showbuf(const char *buf,const char *who)
{
const char *sep = "'";
printf("%s: %s",who,sep);
for (int chr = *buf++; chr != 0; chr = *buf++)
printf("%s",showchr(chr));
printf("%s\n",sep);
}
int
main(int argc,char **argv)
{
char inp[1000];
char buf[1000];
char *src;
char *dst;
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch (cp[-1]) {
case 'd':
opt_d = ! opt_d;
break;
case 'q':
opt_quo = ! opt_quo;
break;
}
}
opt_quo = ! opt_quo;
const char *file;
if (argc > 0)
file = *argv;
else
file = "inp.txt";
FILE *xfsrc = fopen(file,"r");
if (xfsrc == NULL) {
perror(file);
exit(1);
}
while (fgets(inp,sizeof(inp),xfsrc) != NULL) {
strcpy(buf,inp);
src = buf;
dst = buf;
int quo = 0;
int white = 0;
int dot = 1;
int ctr_sent = 0;
int ctr_word = 0;
int ctr_para = 1;
for (int chr = *src++; chr != 0; chr = *src++) {
dbgprt("LOOP %2.2X/%s quo=%d white=%d dot=%d word=%d sent=%d para=%d\n",
chr,showchr(chr),quo,white,dot,
ctr_word,ctr_sent,ctr_para);
// got a quote
if (chr == '"') {
if (! quo)
WHITEOUT;
if (opt_quo)
COPY;
quo = ! quo;
continue;
}
// non-quote
else {
if (quo) {
COPY;
continue;
}
}
// got a dot
if (chr == '.') {
dot = 1;
// double dot --> newline (new paragraph)
if (*src == '.') {
COPYX('\n');
++src;
ctr_para += 1;
continue;
}
COPY;
white = 1;
continue;
}
// from fgets, this can _only_ occur at the end of the buffer
if (chr == '\n') {
dot = 1;
white = 1;
COPY;
break;
}
// accumulate/skip over whitespace
if (chr == ' ') {
white = 1;
continue;
}
// output accumulated whitespace
WHITEOUT;
// got uppercase -- convert to lowercase if we're not at the start
// of a sentence
if (isupper(chr)) {
if (! dot)
chr = tolower(chr);
}
// got lowercase -- capitalize if we're just starting a sentence
else {
if (islower(chr)) {
if (dot)
chr = toupper(chr);
}
}
COPY;
// count sentences
if (dot)
ctr_sent += 1;
dot = 0;
}
*dst = 0;
showbuf(inp,"inp");
showbuf(buf,"buf");
#if 0
if (dot)
ctr_word += 1;
#endif
printf("TOTAL: length=%zu sentences=%d paragraphs=%d words=%d\n",
strlen(buf),ctr_sent,ctr_para,ctr_word);
}
fclose(xfsrc);
return 0;
}
Here is the program output:
inp: 'the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style .{0A}'
buf: 'The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie"{0A} The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.{0A}'
TOTAL: length=214 sentences=3 paragraphs=2 words=31
UPDATE:
Great, thank you! I've taken what you've done and implemented it on my code, basically, the professor restricted us to us only stdio.h and string.h libraries so I couldn't use any other functions... – Nitai Dan
You're welcome!
I'm glad you were able to incorporate my code into yours. That's the best of all possible scenarios for learning.
I wasn't sure I had added enough annotation to make my algorithm clear, so I had cleaned it up a bit and was debating whether to post it or not. It is posted below.
Based on what you just said, I'm [still] not sure whether creating your own functions would have been allowed. As I said, debug functions would [probably] be okay if they don't change the algorithm.
Anyway, here is the updated code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int opt_d; // debug
int opt_quo; // preserve quote
int opt_x; // show space as hex
#define _dbgprt(_fmt...) \
printf(_fmt)
#if DEBUG
#define dbgprt(_fmt...) \
do { \
if (opt_d) \
_dbgprt(_fmt); \
} while (0)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
#if DEBUG
#define COPYX(_chr) \
do { \
*dst++ = _chr; \
copy = 1; \
if (_chr != src[-1]) \
_dbgprt(" COPY %s\n",_showchr(_chr,1)); \
} while (0)
#else
#define COPYX(_chr) \
do { \
*dst++ = _chr; \
} while (0)
#endif
#define COPY \
COPYX(chr)
// output accumulated white space and increment count of number of words
#define SPACEOUT \
do { \
if (! spc) \
break; \
spc = 0; \
COPYX(' '); \
ctr_word += 1; \
} while (0)
const char *
_showchr(int chr,int xflg)
{
static char buf[10];
int lo = xflg ? 0x21 : 0x20;
if ((chr >= lo) && (chr <= 0x7E))
sprintf(buf,"%c",chr);
else
sprintf(buf,"{%2.2X}",chr);
return buf;
}
const char *
showchr(int chr)
{
return _showchr(chr,opt_x);
}
void
showbuf(const char *buf,const char *who)
{
const char *sep = "'";
printf("%s: %s",who,sep);
for (int chr = *buf++; chr != 0; chr = *buf++)
printf("%s",showchr(chr));
printf("%s\n",sep);
}
void
dbgint(const char *sym,int val)
{
do {
int ctr = isupper(sym[0]);
if (! ctr) {
if (! val)
break;
}
_dbgprt(" %s",sym);
if (ctr)
_dbgprt("%d",val);
} while (0);
}
int
main(int argc,char **argv)
{
char inp[1000];
char buf[1000];
char *src;
char *dst;
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch (cp[-1]) {
case 'd':
opt_d = ! opt_d;
break;
case 'q':
opt_quo = ! opt_quo;
break;
case 'x':
opt_x = (*cp != 0) ? atoi(cp) : 1;
break;
}
}
opt_quo = ! opt_quo;
const char *file;
if (argc > 0)
file = *argv;
else
file = "inp.txt";
FILE *xfsrc = fopen(file,"r");
if (xfsrc == NULL) {
perror(file);
exit(1);
}
while (fgets(inp,sizeof(inp),xfsrc) != NULL) {
strcpy(buf,inp);
src = buf;
dst = buf;
// state variables
int quo = 0; // 1=within quoted string
int spc = 0; // 1=space seen
int dot = 1; // 1=period/newline seen
// counters
int ctr_sent = 0; // number of sentences
int ctr_word = 0; // number of words
int ctr_para = 1; // number of paragraphs
#if DEBUG
int copy = 0;
int ochr = 0;
#endif
for (int chr = *src++; chr != 0; chr = *src++) {
#if DEBUG
if (opt_d) {
// show if we skipped the prior char (and it was _not_ a space)
if ((! copy) && (ochr != ' '))
_dbgprt("SKIP\n");
copy = 0;
ochr = chr;
_dbgprt("LOOP %s",showchr(chr));
dbgint("W:",ctr_word);
dbgint("S:",ctr_sent);
dbgint("P:",ctr_para);
dbgint("quo",quo);
dbgint("spc",spc);
dbgint("dot",dot);
_dbgprt("\n");
}
#endif
// got a quote
if (chr == '"') {
// flush whitespace if starting a quoted string
if (! quo)
SPACEOUT;
// copy the quote
if (opt_quo)
COPY;
// flip the quote mode
quo = ! quo;
continue;
}
// non-quote
else {
// if inside a quoted string, just copy out the char verbatim
if (quo) {
COPY;
continue;
}
}
// got a dot
if (chr == '.') {
dot = 1;
// double dot --> newline (new paragraph)
if (*src == '.') {
COPYX('\n');
++src;
ctr_para += 1;
continue;
}
COPY;
// force whitespace mode (ensure space after dot)
// (e.g.) change:
// i go.he goes.
// into:
// i go. he goes.
spc = 1;
continue;
}
// from fgets, this can _only_ occur at the end of the buffer
if (chr == '\n') {
dot = 1;
spc = 1;
COPY;
break;
}
// accumulate/skip over whitespace
if (chr == ' ') {
spc = 1;
continue;
}
// output accumulated whitespace
SPACEOUT;
// convert case
if (dot)
chr = toupper(chr);
else
chr = tolower(chr);
// output the current character -- it's _not_ special
COPY;
// count sentences
if (dot)
ctr_sent += 1;
// we're no longer at the start of a sentence
dot = 0;
}
*dst = 0;
if (opt_x == 1)
opt_x = 0;
showbuf(inp,"inp");
showbuf(buf,"buf");
#if 0
if (dot)
ctr_word += 1;
#endif
printf("TOTAL: length=%zu sentences=%d paragraphs=%d words=%d\n",
strlen(buf),ctr_sent,ctr_para,ctr_word);
}
fclose(xfsrc);
return 0;
}
Output with -d
:
SKIP
LOOP t W:0 S:0 P:1 dot
LOOP h W:0 S:1 P:1
LOOP e W:0 S:1 P:1
LOOP W:0 S:1 P:1
LOOP L W:0 S:1 P:1 spc
COPY {20}
LOOP A W:1 S:1 P:1
LOOP N W:1 S:1 P:1
LOOP G W:1 S:1 P:1
LOOP U W:1 S:1 P:1
LOOP A W:1 S:1 P:1
LOOP G W:1 S:1 P:1
LOOP E W:1 S:1 P:1
LOOP W:1 S:1 P:1
LOOP W:1 S:1 P:1 spc
LOOP " W:1 S:1 P:1 spc
COPY {20}
LOOP C W:2 S:1 P:1 quo
LOOP " W:2 S:1 P:1 quo
LOOP W:2 S:1 P:1
LOOP i W:2 S:1 P:1 spc
COPY {20}
LOOP s W:3 S:1 P:1
LOOP W:3 S:1 P:1
LOOP a W:3 S:1 P:1 spc
COPY {20}
LOOP W:4 S:1 P:1
LOOP p W:4 S:1 P:1 spc
COPY {20}
LOOP r W:5 S:1 P:1
LOOP o W:5 S:1 P:1
LOOP c W:5 S:1 P:1
LOOP e W:5 S:1 P:1
LOOP d W:5 S:1 P:1
LOOP u W:5 S:1 P:1
LOOP r W:5 S:1 P:1
LOOP a W:5 S:1 P:1
LOOP l W:5 S:1 P:1
LOOP W:5 S:1 P:1
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP W:5 S:1 P:1 spc
LOOP p W:5 S:1 P:1 spc
COPY {20}
LOOP r W:6 S:1 P:1
LOOP o W:6 S:1 P:1
LOOP g W:6 S:1 P:1
LOOP r W:6 S:1 P:1
LOOP a W:6 S:1 P:1
LOOP m W:6 S:1 P:1
LOOP m W:6 S:1 P:1
LOOP i W:6 S:1 P:1
LOOP n W:6 S:1 P:1
LOOP g W:6 S:1 P:1
LOOP W:6 S:1 P:1
LOOP l W:6 S:1 P:1 spc
COPY {20}
LOOP a W:7 S:1 P:1
LOOP n W:7 S:1 P:1
LOOP g W:7 S:1 P:1
LOOP u W:7 S:1 P:1
LOOP a W:7 S:1 P:1
LOOP g W:7 S:1 P:1
LOOP e W:7 S:1 P:1
LOOP W:7 S:1 P:1
LOOP W:7 S:1 P:1 spc
LOOP W:7 S:1 P:1 spc
LOOP W:7 S:1 P:1 spc
LOOP W:7 S:1 P:1 spc
LOOP . W:7 S:1 P:1 spc
LOOP I W:7 S:1 P:1 spc dot
COPY {20}
LOOP t W:8 S:2 P:1
LOOP W:8 S:2 P:1
LOOP w W:8 S:2 P:1 spc
COPY {20}
LOOP a W:9 S:2 P:1
LOOP s W:9 S:2 P:1
LOOP W:9 S:2 P:1
LOOP i W:9 S:2 P:1 spc
COPY {20}
LOOP n W:10 S:2 P:1
LOOP i W:10 S:2 P:1
LOOP t W:10 S:2 P:1
LOOP i W:10 S:2 P:1
LOOP a W:10 S:2 P:1
LOOP l W:10 S:2 P:1
LOOP l W:10 S:2 P:1
LOOP y W:10 S:2 P:1
LOOP W:10 S:2 P:1
LOOP d W:10 S:2 P:1 spc
COPY {20}
LOOP e W:11 S:2 P:1
LOOP v W:11 S:2 P:1
LOOP e W:11 S:2 P:1
LOOP l W:11 S:2 P:1
LOOP o W:11 S:2 P:1
LOOP p W:11 S:2 P:1
LOOP e W:11 S:2 P:1
LOOP d W:11 S:2 P:1
LOOP W:11 S:2 P:1
LOOP b W:11 S:2 P:1 spc
COPY {20}
LOOP y W:12 S:2 P:1
LOOP W:12 S:2 P:1
LOOP " W:12 S:2 P:1 spc
COPY {20}
LOOP D W:13 S:2 P:1 quo
LOOP e W:13 S:2 P:1 quo
LOOP n W:13 S:2 P:1 quo
LOOP n W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP s W:13 S:2 P:1 quo
LOOP W:13 S:2 P:1 quo
LOOP R W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP t W:13 S:2 P:1 quo
LOOP c W:13 S:2 P:1 quo
LOOP h W:13 S:2 P:1 quo
LOOP i W:13 S:2 P:1 quo
LOOP e W:13 S:2 P:1 quo
LOOP " W:13 S:2 P:1 quo
LOOP . W:13 S:2 P:1
COPY {0A}
LOOP W:13 S:2 P:2 dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP W:13 S:2 P:2 spc dot
LOOP t W:13 S:2 P:2 spc dot
COPY {20}
COPY T
LOOP h W:14 S:3 P:2
LOOP e W:14 S:3 P:2
LOOP W:14 S:3 P:2
LOOP M W:14 S:3 P:2 spc
COPY {20}
COPY m
LOOP a W:15 S:3 P:2
LOOP i W:15 S:3 P:2
LOOP n W:15 S:3 P:2
LOOP W:15 S:3 P:2
LOOP f W:15 S:3 P:2 spc
COPY {20}
LOOP e W:16 S:3 P:2
LOOP A W:16 S:3 P:2
COPY a
LOOP t W:16 S:3 P:2
LOOP u W:16 S:3 P:2
LOOP r W:16 S:3 P:2
LOOP e W:16 S:3 P:2
LOOP s W:16 S:3 P:2
LOOP W:16 S:3 P:2
LOOP o W:16 S:3 P:2 spc
COPY {20}
LOOP f W:17 S:3 P:2
LOOP W:17 S:3 P:2
LOOP " W:17 S:3 P:2 spc
COPY {20}
LOOP C W:18 S:3 P:2 quo
LOOP " W:18 S:3 P:2 quo
LOOP W:18 S:3 P:2
LOOP l W:18 S:3 P:2 spc
COPY {20}
LOOP a W:19 S:3 P:2
LOOP n W:19 S:3 P:2
LOOP g W:19 S:3 P:2
LOOP u W:19 S:3 P:2
LOOP a W:19 S:3 P:2
LOOP g W:19 S:3 P:2
LOOP e W:19 S:3 P:2
LOOP W:19 S:3 P:2
LOOP i W:19 S:3 P:2 spc
COPY {20}
LOOP n W:20 S:3 P:2
LOOP c W:20 S:3 P:2
LOOP l W:20 S:3 P:2
LOOP u W:20 S:3 P:2
LOOP d W:20 S:3 P:2
LOOP e W:20 S:3 P:2
LOOP W:20 S:3 P:2
LOOP l W:20 S:3 P:2 spc
COPY {20}
LOOP o W:21 S:3 P:2
LOOP w W:21 S:3 P:2
LOOP - W:21 S:3 P:2
LOOP l W:21 S:3 P:2
LOOP e W:21 S:3 P:2
LOOP v W:21 S:3 P:2
LOOP e W:21 S:3 P:2
LOOP l W:21 S:3 P:2
LOOP W:21 S:3 P:2
LOOP a W:21 S:3 P:2 spc
COPY {20}
LOOP c W:22 S:3 P:2
LOOP c W:22 S:3 P:2
LOOP e W:22 S:3 P:2
LOOP s W:22 S:3 P:2
LOOP s W:22 S:3 P:2
LOOP W:22 S:3 P:2
LOOP t W:22 S:3 P:2 spc
COPY {20}
LOOP o W:23 S:3 P:2
LOOP W:23 S:3 P:2
LOOP m W:23 S:3 P:2 spc
COPY {20}
LOOP e W:24 S:3 P:2
LOOP m W:24 S:3 P:2
LOOP o W:24 S:3 P:2
LOOP r W:24 S:3 P:2
LOOP y W:24 S:3 P:2
LOOP , W:24 S:3 P:2
LOOP W:24 S:3 P:2
LOOP s W:24 S:3 P:2 spc
COPY {20}
LOOP i W:25 S:3 P:2
LOOP m W:25 S:3 P:2
LOOP p W:25 S:3 P:2
LOOP l W:25 S:3 P:2
LOOP e W:25 S:3 P:2
LOOP W:25 S:3 P:2
LOOP s W:25 S:3 P:2 spc
COPY {20}
LOOP e W:26 S:3 P:2
LOOP t W:26 S:3 P:2
LOOP W:26 S:3 P:2
LOOP o W:26 S:3 P:2 spc
COPY {20}
LOOP f W:27 S:3 P:2
LOOP W:27 S:3 P:2
LOOP k W:27 S:3 P:2 spc
COPY {20}
LOOP e W:28 S:3 P:2
LOOP y W:28 S:3 P:2
LOOP w W:28 S:3 P:2
LOOP o W:28 S:3 P:2
LOOP r W:28 S:3 P:2
LOOP d W:28 S:3 P:2
LOOP s W:28 S:3 P:2
LOOP , W:28 S:3 P:2
LOOP W:28 S:3 P:2
LOOP a W:28 S:3 P:2 spc
COPY {20}
LOOP n W:29 S:3 P:2
LOOP d W:29 S:3 P:2
LOOP W:29 S:3 P:2
LOOP c W:29 S:3 P:2 spc
COPY {20}
LOOP l W:30 S:3 P:2
LOOP e W:30 S:3 P:2
LOOP a W:30 S:3 P:2
LOOP n W:30 S:3 P:2
LOOP W:30 S:3 P:2
LOOP s W:30 S:3 P:2 spc
COPY {20}
LOOP t W:31 S:3 P:2
LOOP y W:31 S:3 P:2
LOOP l W:31 S:3 P:2
LOOP e W:31 S:3 P:2
LOOP W:31 S:3 P:2
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP W:31 S:3 P:2 spc
LOOP . W:31 S:3 P:2 spc
LOOP {0A} W:31 S:3 P:2 spc dot
inp: 'the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style .{0A}'
buf: 'The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie"{0A} The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.{0A}'
TOTAL: length=214 sentences=3 paragraphs=2 words=31
Solution 2:[2]
Here is a possible solution.
Hopefully this will show that you don't need all of the repetition.
I've only tested it with the example given, there might well still be edge cases where it might break. You might want to allocate the buffer
rather than using a specific value, but you might need to check to see if there are inputs that might lead to expansion.
Regarding capitals in the right places, there is no scope in the original for anything other than a single line, so no concept of paragraphs. Therefore I've gone for caps at the start of sentences.
NB: OP didn't specify what the correct output was, given that the post is titled "wrong character count" so this is a best guess based on the requirements and some insight from the OP's code (which might as stated wasn't producing the right result).
I don't think that the point here is to fix the OP's bugs, but to illustrate alterantive ways of getting closer too, or achieving, a solution.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char original[] = "the LANGUAGE \"C\" is a procedural programming language .It was initially developed by \"Dennis Ritchie\".. the Main feAtures of \"C\" language include low-level access to memory, simple set of keywords, and clean style .";
int main() {
char buffer[256];
strcpy(buffer, original);
char *src, *dest;
bool quoted = false;
bool sentence = false;
int periods = 0;
bool space = false;
bool paragraph = true;
bool comma = false;
int letters = 0;
int words = 0;
int sentences = 0;
int paragraphs = 0;
src = dest = buffer;
for ( ; *src ; src++) {
if (quoted) {
switch (*src) {
case '"':
quoted = false;
break;
}
if ((*src >= 'a' && *src <= 'z') || (*src >= 'a' && *src <= 'z')) {
letters++;
} else {
if (letters) {
words++;
letters = 0;
}
}
*dest++ = *src;
} else {
switch (*src) {
case '"':
quoted = true;
break;
case ',':
comma = true;
continue;
case ' ':
if (letters) {
words++;
letters = 0;
}
space = true;
continue;
case '.':
if (++periods == 2) {
*dest++ = '.';
*dest++ = '\n';
periods = 0;
paragraph = true;
} else {
sentence = true;
}
continue;
}
if (comma) {
*dest++ = ',';
*dest++ = ' ';
comma = space = false;
}
if (periods) {
*dest++ = '.';
periods = 0;
}
if (space) {
if (!paragraph) {
*dest++ = ' ';
}
space = false;
}
if ((*src >= 'a' && *src <= 'z') || (*src >= 'a' && *src <= 'z')) {
letters++;
} else {
if (letters) {
words++;
letters = 0;
}
}
*dest++ = sentence || paragraph ? toupper(*src) : tolower(*src);
if (sentence || paragraph) {
if (letters) {
words++;
}
letters = 0;
}
if (sentence) {
sentences++;
}
if (paragraph) {
paragraphs++;
}
sentence = paragraph = false;
}
}
if (sentence) {
sentences++;
}
if (paragraph) {
paragraphs++;
}
if (periods) {
*dest++ = '.';
}
*dest++ = '\n';
*dest = '\0';
printf("\nInput Chars=%d\n\n\"%s\"\n", (int)strlen(original), original);
printf("\nOutput Chars=%d, Words=%d, Sentences=%d, Paragraphs=%d\n\n\"%s\"\n", (int)strlen(buffer), words, sentences, paragraphs, buffer);
return 0;
}
This produces:
Input Chars=259
"the LANGUAGE "C" is a procedural programming language .It was initially developed by "Dennis Ritchie".. the Main feAtures of "C" language include low-level access to memory, simple set of keywords, and clean style ."
Output Chars=214, Words=34, Sentences=3, Paragraphs=2
"The language "C" is a procedural programming language. It was initially developed by "Dennis Ritchie".
The main features of "C" language include low-level access to memory, simple set of keywords, and clean style.
"
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | |
Solution 2 |