Skip to content

Commit

Permalink
Merge pull request #4314 from stweil/optimize
Browse files Browse the repository at this point in the history
Add C++ stream for log messages and use it in two debug messages
  • Loading branch information
stweil authored Sep 4, 2024
2 parents a63e7ec + 37d1c65 commit 4f43536
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 15 deletions.
16 changes: 9 additions & 7 deletions src/ccmain/control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#endif
#include "sorthelper.h"
#include "tesseractclass.h"
#include "tesserrstream.h" // for tesserr
#include "tessvars.h"
#include "werdit.h"

Expand Down Expand Up @@ -1313,9 +1314,10 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD
PointerVector<WERD_RES> best_words;
// Points to the best result. May be word or in lang_words.
const WERD_RES *word = word_data->word;
clock_t start_t = 0;
if (tessedit_timing_debug) {
start_t = clock();
clock_t total_time = 0;
const bool timing_debug = tessedit_timing_debug;
if (timing_debug) {
total_time = clock();
}
const bool debug = classify_debug_level > 0 || multilang_debug_level > 0;
if (debug) {
Expand Down Expand Up @@ -1368,10 +1370,10 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD
} else {
tprintf("no best words!!\n");
}
if (tessedit_timing_debug) {
clock_t ocr_t = clock();
tprintf("%s (ocr took %.2f sec)\n", word_data->word->best_choice->unichar_string().c_str(),
static_cast<double>(ocr_t - start_t) / CLOCKS_PER_SEC);
if (timing_debug) {
total_time = clock() - total_time;
tesserr << word_data->word->best_choice->unichar_string()
<< " (ocr took " << 1000 * total_time / CLOCKS_PER_SEC << " ms)\n";
}
}

Expand Down
68 changes: 68 additions & 0 deletions src/ccutil/tesserrstream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// File: tesserrstream.h
// Description: C++ stream which enhances tprintf
// Author: Stefan Weil
//
// (C) Copyright 2024
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef TESSERACT_CCUTIL_TESSERRSTREAM_H
#define TESSERACT_CCUTIL_TESSERRSTREAM_H

#include "tprintf.h"
#include <tesseract/export.h> // for TESS_API

#include <ostream> // for std::ostream

namespace tesseract {

class TessStreamBuf : public std::streambuf {
public:
TessStreamBuf() = default;

protected:
virtual int_type overflow(int_type c) override {
if (c != EOF) {
if (debugfp == nullptr) {
debugfp = get_debugfp();
}
if (fputc(c, debugfp) == EOF) {
return EOF;
}
}
return c;
}

virtual std::streamsize xsputn(const char* s, std::streamsize n) override {
if (debugfp == nullptr) {
debugfp = get_debugfp();
}
return fwrite(s, 1, n, debugfp);
}

private:
FILE *debugfp = nullptr;
};

class TessErrStream : public std::ostream {
private:
TessStreamBuf buf;

public:
TessErrStream() : std::ostream(nullptr), buf() {
rdbuf(&buf);
}
};

extern TESS_API TessErrStream tesserr;

} // namespace tesseract

#endif // TESSERACT_CCUTIL_TESSERRSTREAM_H
12 changes: 8 additions & 4 deletions src/ccutil/tprintf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# include "config_auto.h"
#endif

#include "tesserrstream.h"
#include "tprintf.h"

#include "params.h"
Expand All @@ -36,7 +37,7 @@ INT_VAR(log_level, INT_MAX, "Logging level");
static STRING_VAR(debug_file, "", "File to send tprintf output to");

// File for debug output.
static FILE *debugfp;
FILE *debugfp;

// Set output for log messages.
// The output is written to stderr if debug_file is empty.
Expand All @@ -49,7 +50,7 @@ static FILE *debugfp;
// tprintf("write to /tmp/log\n");
// debug_file = "";
// tprintf("write to stderr\n");
static void set_debugfp() {
FILE *get_debugfp() {
if (debug_file.empty()) {
// Write to stderr.
if (debugfp != stderr && debugfp != nullptr) {
Expand All @@ -66,15 +67,18 @@ static void set_debugfp() {
#endif
debugfp = fopen(debug_file.c_str(), "wb");
}
return debugfp;
}

// Trace printf.
void tprintf(const char *format, ...) {
set_debugfp();
FILE *f = get_debugfp();
va_list args; // variable args
va_start(args, format); // variable list
vfprintf(debugfp, format, args);
vfprintf(f, format, args);
va_end(args);
}

TessErrStream tesserr;

} // namespace tesseract
3 changes: 3 additions & 0 deletions src/ccutil/tprintf.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ extern TESS_API void tprintf( // Trace printf
const char *format, ...) // Message
__attribute__((format(printf, 1, 2)));

// Get file for debug output.
FILE *get_debugfp();

} // namespace tesseract

#undef __attribute__
Expand Down
12 changes: 8 additions & 4 deletions src/training/common/errorcounter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "sampleiterator.h"
#include "shapeclassifier.h"
#include "shapetable.h"
#include "tesserrstream.h"
#include "trainingsample.h"
#include "trainingsampleset.h"
#include "unicity_table.h"
Expand Down Expand Up @@ -50,7 +51,10 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
ErrorCounter counter(classifier->GetUnicharset(), fontsize);
std::vector<UnicharRating> results;

clock_t start = clock();
clock_t total_time = 0;
if (report_level > 1) {
total_time = clock();
}
unsigned total_samples = 0;
double unscaled_error = 0.0;
// Set a number of samples on which to run the classify debug mode.
Expand Down Expand Up @@ -85,7 +89,6 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
}
++total_samples;
}
const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;
// Create the appropriate error report.
unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it,
unichar_error, fonts_report);
Expand All @@ -94,8 +97,9 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
}
if (report_level > 1 && total_samples > 0) {
// It is useful to know the time in microseconds/char.
tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time,
1000000.0 * total_time / total_samples);
total_time = 1000 * (clock() - total_time) / CLOCKS_PER_SEC;
tesserr << "Errors computed in " << total_time << " ms at "
<< 1000 * total_time / total_samples << " μs/char\n";
}
return unscaled_error;
}
Expand Down

0 comments on commit 4f43536

Please sign in to comment.