Skip to content

Commit

Permalink
Spam storage and web ui for detected spam (#51)
Browse files Browse the repository at this point in the history
* Add spam logging to database

The main functionality of the spam logger has been expanded to not only log spam messages to a file, but also store them in a database. This facilitates efficient tracking and analysis of spam data. Furthermore, test cases have been added to ensure this new functionality works as expected.

* Add Detected Spam page to web UI

A Detected Spam page has been added to the web UI, where users can see a list of detected spam messages. Changes have been made to attach data DB to the server activation function and the timestamp is now localized. Fixed a bug in storage/detected_spam.go where the 'Checks' field was incorrectly attached to a non-existent 'entry' variable.

* Add DetectedSpamReader mock and implement detected spam HTML handler tests
  • Loading branch information
umputun authored Jan 19, 2024
1 parent b20079f commit 1985b42
Show file tree
Hide file tree
Showing 11 changed files with 512 additions and 24 deletions.
4 changes: 3 additions & 1 deletion app/events/listener_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ func TestTelegramListener_DoWithBotBan(t *testing.T) {
b := &mocks.BotMock{OnMessageFunc: func(msg bot.Message) bot.Response {
t.Logf("on-message: %+v", msg)
if msg.Text == "text 123" && msg.From.Username == "user" {
return bot.Response{Send: true, Text: "bot's answer", BanInterval: 2 * time.Minute, User: bot.User{Username: "user", ID: 1}}
return bot.Response{Send: true, Text: "bot's answer", BanInterval: 2 * time.Minute,
User: bot.User{Username: "user", ID: 1}, CheckResults: []spamcheck.Response{
{Name: "Check1", Spam: true, Details: "Details 1"}}}
}
if msg.From.Username == "ChannelBot" {
return bot.Response{Send: true, Text: "bot's answer for channel", BanInterval: 2 * time.Minute, User: bot.User{Username: "user", ID: 1}, ChannelID: msg.SenderChat.ID}
Expand Down
60 changes: 47 additions & 13 deletions app/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/fatih/color"
"github.com/go-pkgz/lgr"
tbapi "github.com/go-telegram-bot-api/telegram-bot-api/v5"
"github.com/jmoiron/sqlx"
"github.com/sashabaranov/go-openai"
"github.com/umputun/go-flags"
"gopkg.in/natefinch/lumberjack.v2"
Expand Down Expand Up @@ -213,7 +214,7 @@ func execute(ctx context.Context, opts options) error {
// activate web server if enabled
if opts.Server.Enabled {
// server starts in background goroutine
if srvErr := activateServer(ctx, opts, spamBot, locator); srvErr != nil {
if srvErr := activateServer(ctx, opts, spamBot, locator, dataDB); srvErr != nil {
return fmt.Errorf("can't activate web server, %w", srvErr)
}
// if no telegram token and group set, just run the server
Expand All @@ -231,13 +232,19 @@ func execute(ctx context.Context, opts options) error {
}
tbAPI.Debug = opts.TGDbg

// make spam logger
// make spam logger writer
loggerWr, err := makeSpamLogWriter(opts)
if err != nil {
return fmt.Errorf("can't make spam log writer, %w", err)
}
defer loggerWr.Close()

// make spam logger
spamLogger, err := makeSpamLogger(loggerWr, dataDB)
if err != nil {
return fmt.Errorf("can't make spam logger, %w", err)
}

// make telegram listener
tgListener := events.TelegramListener{
TbAPI: tbAPI,
Expand All @@ -247,7 +254,7 @@ func execute(ctx context.Context, opts options) error {
Bot: spamBot,
StartupMsg: opts.Message.Startup,
NoSpamReply: opts.NoSpamReply,
SpamLogger: makeSpamLogger(loggerWr),
SpamLogger: spamLogger,
AdminGroup: opts.AdminGroup,
TestingIDs: opts.TestingIDs,
Locator: locator,
Expand Down Expand Up @@ -305,7 +312,7 @@ func checkVolumeMount(opts options) (ok bool) {
return false
}

func activateServer(ctx context.Context, opts options, sf *bot.SpamFilter, loc *storage.Locator) (err error) {
func activateServer(ctx context.Context, opts options, sf *bot.SpamFilter, loc *storage.Locator, dataDB *sqlx.DB) (err error) {
authPassswd := opts.Server.AuthPasswd
if opts.Server.AuthPasswd == "auto" {
authPassswd, err = webapi.GenerateRandomPassword(20)
Expand All @@ -315,14 +322,21 @@ func activateServer(ctx context.Context, opts options, sf *bot.SpamFilter, loc *
log.Printf("[WARN] generated basic auth password for user tg-spam: %q", authPassswd)
}

// make store and load approved users
detectedSpamStore, auErr := storage.NewDetectedSpam(dataDB)
if auErr != nil {
return fmt.Errorf("can't make approved users store, %w", auErr)
}

srv := webapi.Server{Config: webapi.Config{
ListenAddr: opts.Server.ListenAddr,
Detector: sf.Detector,
SpamFilter: sf,
Locator: loc,
AuthPasswd: authPassswd,
Version: revision,
Dbg: opts.Dbg,
ListenAddr: opts.Server.ListenAddr,
Detector: sf.Detector,
SpamFilter: sf,
Locator: loc,
DetectedSpamReader: detectedSpamStore,
AuthPasswd: authPassswd,
Version: revision,
Dbg: opts.Dbg,
}}

go func() {
Expand Down Expand Up @@ -432,8 +446,15 @@ func (n nopWriteCloser) Close() error { return nil }

// makeSpamLogger creates spam logger to keep reports about spam messages
// it writes json lines to the provided writer
func makeSpamLogger(wr io.Writer) events.SpamLogger {
return events.SpamLoggerFunc(func(msg *bot.Message, response *bot.Response) {
func makeSpamLogger(wr io.Writer, dataDB *sqlx.DB) (events.SpamLogger, error) {
// make store and load approved users
detectedSpamStore, auErr := storage.NewDetectedSpam(dataDB)
if auErr != nil {
return nil, fmt.Errorf("can't make approved users store, %w", auErr)
}

logWr := events.SpamLoggerFunc(func(msg *bot.Message, response *bot.Response) {
// write to log file
text := strings.ReplaceAll(msg.Text, "\n", " ")
text = strings.TrimSpace(text)
log.Printf("[DEBUG] spam detected from %v, text: %s", msg.From, text)
Expand All @@ -458,7 +479,20 @@ func makeSpamLogger(wr io.Writer) events.SpamLogger {
if _, err := wr.Write(append(line, '\n')); err != nil {
log.Printf("[WARN] can't write to log, %v", err)
}

// write to db store
rec := storage.DetectedSpamInfo{
Text: text,
UserID: msg.From.ID,
UserName: msg.From.Username,
Timestamp: time.Now().In(time.Local),
}
if err := detectedSpamStore.Write(rec, response.CheckResults); err != nil {
log.Printf("[WARN] can't write to db, %v", err)
}
})

return logWr, nil
}

// makeSpamLogWriter creates spam log writer to keep reports about spam messages
Expand Down
29 changes: 26 additions & 3 deletions app/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,26 @@ import (
"testing"
"time"

"github.com/jmoiron/sqlx"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/umputun/tg-spam/app/bot"
"github.com/umputun/tg-spam/app/storage"
"github.com/umputun/tg-spam/lib/spamcheck"
)

func TestMakeSpamLogger(t *testing.T) {
file, err := os.CreateTemp(os.TempDir(), "log")
require.NoError(t, err)
defer os.Remove(file.Name())

logger := makeSpamLogger(file)
db, err := sqlx.Open("sqlite", ":memory:")
require.NoError(t, err)
defer db.Close()

logger, err := makeSpamLogger(file, db)
require.NoError(t, err)

msg := &bot.Message{
From: bot.User{
Expand All @@ -36,14 +44,18 @@ func TestMakeSpamLogger(t *testing.T) {

response := &bot.Response{
Text: "spam detected",
CheckResults: []spamcheck.Response{
{Name: "Check1", Spam: true, Details: "Details 1"},
{Name: "Check2", Spam: false, Details: "Details 2"},
},
}

logger.Save(msg, response)
file.Close()

// check that the message is saved to the log file
file, err = os.Open(file.Name())
require.NoError(t, err)

scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
Expand All @@ -58,8 +70,19 @@ func TestMakeSpamLogger(t *testing.T) {
assert.Equal(t, float64(123), logEntry["user_id"]) // json.Unmarshal converts numbers to float64
assert.Equal(t, "Test message blah blah", logEntry["text"])
}

assert.NoError(t, scanner.Err())

// check that the message is saved to the database
savedMsgs := []storage.DetectedSpamInfo{}
err = db.Select(&savedMsgs, "SELECT text, user_id, user_name, timestamp, checks FROM detected_spam")
require.NoError(t, err)
assert.Equal(t, 1, len(savedMsgs))
assert.Equal(t, "Test message blah blah", savedMsgs[0].Text)
assert.Equal(t, "testuser", savedMsgs[0].UserName)
assert.Equal(t, int64(123), savedMsgs[0].UserID)
assert.Equal(t, `[{"name":"Check1","spam":true,"details":"Details 1"},{"name":"Check2","spam":false,"details":"Details 2"}]`,
savedMsgs[0].ChecksJSON)

}

func TestMakeSpamLogWriter(t *testing.T) {
Expand Down
78 changes: 78 additions & 0 deletions app/storage/detected_spam.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package storage

import (
"encoding/json"
"fmt"
"log"
"time"

"github.com/jmoiron/sqlx"

"github.com/umputun/tg-spam/lib/spamcheck"
)

// DetectedSpam is a storage for detected spam entries
type DetectedSpam struct {
db *sqlx.DB
}

// DetectedSpamInfo represents information about a detected spam entry.
type DetectedSpamInfo struct {
Text string `db:"text"`
UserID int64 `db:"user_id"`
UserName string `db:"user_name"`
Timestamp time.Time `db:"timestamp"`
ChecksJSON string `db:"checks"` // Store as JSON
Checks []spamcheck.Response `db:"-"` // Don't store in DB
}

// NewDetectedSpam creates a new DetectedSpam storage
func NewDetectedSpam(db *sqlx.DB) (*DetectedSpam, error) {
_, err := db.Exec(`CREATE TABLE IF NOT EXISTS detected_spam (
id INTEGER PRIMARY KEY AUTOINCREMENT,
text TEXT,
user_id INTEGER,
user_name TEXT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
checks TEXT
)`)
if err != nil {
return nil, fmt.Errorf("failed to create detected_spam table: %w", err)
}
return &DetectedSpam{db: db}, nil
}

// Write adds a new detected spam entry
func (ds *DetectedSpam) Write(entry DetectedSpamInfo, checks []spamcheck.Response) error {
checksJSON, err := json.Marshal(checks)
if err != nil {
return fmt.Errorf("failed to marshal checks: %w", err)
}

query := `INSERT INTO detected_spam (text, user_id, user_name, timestamp, checks) VALUES (?, ?, ?, ?, ?)`
if _, err := ds.db.Exec(query, entry.Text, entry.UserID, entry.UserName, entry.Timestamp, checksJSON); err != nil {
return fmt.Errorf("failed to insert detected spam entry: %w", err)
}

log.Printf("[INFO] detected spam entry added for user_id:%d, name:%s", entry.UserID, entry.UserName)
return nil
}

// Read returns all detected spam entries
func (ds *DetectedSpam) Read() ([]DetectedSpamInfo, error) {
var entries []DetectedSpamInfo
err := ds.db.Select(&entries, "SELECT text, user_id, user_name, timestamp, checks FROM detected_spam ORDER BY timestamp DESC")
if err != nil {
return nil, fmt.Errorf("failed to get detected spam entries: %w", err)
}

for i, entry := range entries {
var checks []spamcheck.Response
if err := json.Unmarshal([]byte(entry.ChecksJSON), &checks); err != nil {
return nil, fmt.Errorf("failed to unmarshal checks for entry %d: %w", i, err)
}
entries[i].Checks = checks
entries[i].Timestamp = entry.Timestamp.Local()
}
return entries, nil
}
102 changes: 102 additions & 0 deletions app/storage/detected_spam_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package storage

import (
"encoding/json"
"testing"
"time"

"github.com/jmoiron/sqlx"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/umputun/tg-spam/lib/spamcheck"
)

func TestDetectedSpam_NewDetectedSpam(t *testing.T) {
db, err := sqlx.Open("sqlite", ":memory:")
require.NoError(t, err)
defer db.Close()

_, err = NewDetectedSpam(db)
require.NoError(t, err)

var exists int
err = db.Get(&exists, "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='detected_spam'")
require.NoError(t, err)
assert.Equal(t, 1, exists)
}

func TestDetectedSpam_Write(t *testing.T) {
db, err := sqlx.Open("sqlite", ":memory:")
require.NoError(t, err)
defer db.Close()

ds, err := NewDetectedSpam(db)
require.NoError(t, err)

spamEntry := DetectedSpamInfo{
Text: "spam message",
UserID: 1,
UserName: "Spammer",
Timestamp: time.Now(),
}

checks := []spamcheck.Response{
{
Name: "Check1",
Spam: true,
Details: "Details 1",
},
}

err = ds.Write(spamEntry, checks)
require.NoError(t, err)

var count int
err = db.Get(&count, "SELECT COUNT(*) FROM detected_spam")
require.NoError(t, err)
assert.Equal(t, 1, count)
}

func TestDetectedSpam_Read(t *testing.T) {
db, err := sqlx.Open("sqlite", ":memory:")
require.NoError(t, err)
defer db.Close()

ds, err := NewDetectedSpam(db)
require.NoError(t, err)

spamEntry := DetectedSpamInfo{
Text: "spam message",
UserID: 1,
UserName: "Spammer",
Timestamp: time.Now(),
}

checks := []spamcheck.Response{
{
Name: "Check1",
Spam: true,
Details: "Details 1",
},
}

checksJSON, err := json.Marshal(checks)
require.NoError(t, err)
_, err = db.Exec("INSERT INTO detected_spam (text, user_id, user_name, timestamp, checks) VALUES (?, ?, ?, ?, ?)", spamEntry.Text, spamEntry.UserID, spamEntry.UserName, spamEntry.Timestamp, checksJSON)
require.NoError(t, err)

entries, err := ds.Read()
require.NoError(t, err)
require.Len(t, entries, 1)

assert.Equal(t, spamEntry.Text, entries[0].Text)
assert.Equal(t, spamEntry.UserID, entries[0].UserID)
assert.Equal(t, spamEntry.UserName, entries[0].UserName)

var retrievedChecks []spamcheck.Response
err = json.Unmarshal([]byte(entries[0].ChecksJSON), &retrievedChecks)
require.NoError(t, err)
assert.Equal(t, checks, retrievedChecks)
t.Logf("retrieved checks: %+v", retrievedChecks)
}
3 changes: 3 additions & 0 deletions app/webapi/assets/components/navbar.html
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
<li class="nav-item">
<a class="nav-link" href="/manage_users">Manage Users</a>
</li>
<li class="nav-item">
<a class="nav-link" href="/detected_spam">Detected Spam</a>
</li>
</ul>
</div>
</div>
Expand Down
Loading

0 comments on commit 1985b42

Please sign in to comment.