Skip to content

Commit ca8cf31

Browse files
committed
调整对接dl服务代码
1 parent af089f4 commit ca8cf31

15 files changed

+137
-57
lines changed

README.md

Whitespace-only changes.

docker-compose.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
version: 3
2+
3+
services:
4+
- file-server:
5+

file-server-dl/.dockerignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
venv/

file-server-dl/Dockerfile

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from file-server-dl:latest
2+
WORKDIR /app
3+
ADD . .
4+
# RUN https_proxy=http://192.168.163.65:10080 pip install -r requirements.txt
5+
CMD ["python", "main.py"]

file-server-dl/main.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,26 @@
11
from magika import Magika
22
from flask import Flask, request
33
from service.file_understanding import FileUnderstanding
4+
import logging
5+
6+
logging.basicConfig(level=logging.DEBUG)
47

58
fileUnderstanding = FileUnderstanding()
9+
logging.info("File Understanding Service Started")
610

711
app = Flask(__name__)
812
# m = Magika()
913
@app.route('/api/v1/file/understanding', methods=['POST'])
1014
def FileTypeInterfer():
1115
data = request.get_json()
12-
return fileUnderstanding.understand(data['path'])
16+
logging.info("File Understanding Request: %s", data['path'])
17+
try:
18+
result = fileUnderstanding.understand(data['path'])
19+
return result.to_dict()
20+
except Exception as e:
21+
logging.error("Error: %s", e)
22+
return {"error": str(e)}
1323

1424
if __name__ == "__main__":
1525
# start an http server here
16-
app.run(host='0.0.0.0', port='8081', debug=True)
26+
app.run(host='0.0.0.0', port='8081', debug=False, use_reloader=False)

file-server-dl/models/file.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,5 @@ def to_dict(self) -> dict:
1919
'label': self.label,
2020
'group': self.group,
2121
'description': self.description,
22-
'extension': self.ext
22+
'extension': self.ext.to_dict() if self.ext is not None else None
2323
}

file-server-dl/service/file_understanding.py

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from pathlib import Path
33
from models.file import FileUnderstandingResult
44
from service.image_understanding import ImageUnderstanding
5+
import logging
56

67
class FileUnderstanding:
78
def __init__(self):
@@ -13,4 +14,5 @@ def understand(self, path: str) -> FileUnderstandingResult:
1314
file_understanding = FileUnderstandingResult(result.output.ct_label, result.output.group, result.output.description)
1415
if file_understanding.group == 'image':
1516
file_understanding.set_ext(self.image_understanding.understand(path))
17+
logging.info("File Understanding Result: %s", file_understanding)
1618
return file_understanding

file-server-dl/service/image_understanding.py

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def __init__(self):
1515
self.caption_model = None
1616
self.caption_vis_processors = None
1717
self.__init_clip_model__()
18+
self.__init_caption_model__()
1819

1920
def __init_clip_model__(self):
2021
self.clip_model, self.clip_preprocess = load_from_name("ViT-B-16", device=self.device, download_root="./")

file-server/Dockerfile

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FROM golang:1.22 as builder
2+
WORKDIR /build
3+
ADD . .
4+
ENV HTTPS_PROXY=http://192.168.163.65:10080
5+
RUN go build -o app ./cmd/app
6+
7+
FROM ubuntu as app
8+
WORKDIR /app
9+
COPY --from=builder /build/app .
10+
EXPOSE 8080
11+
ENTRYPOINT ["./app"]

file-server/file/file.go

+46
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
package file
22

33
import (
4+
"encoding/json"
45
"fileserver/utils"
6+
"strings"
57

8+
"github.com/ahmetb/go-linq/v3"
69
"gorm.io/gorm"
710
)
811

@@ -15,9 +18,12 @@ type File struct {
1518
Group string
1619
Description string
1720
Tags string
21+
Caption string
22+
Checksum string
1823
}
1924

2025
func NewFile(path string) File {
26+
// calc sha256 hash checksum
2127
return File{
2228
Path: path,
2329
Directory: utils.GetDirectory(path),
@@ -30,3 +36,43 @@ func (f *File) SetFileType(ttype, group, description string) {
3036
f.Group = group
3137
f.Description = description
3238
}
39+
40+
func (f *File) SetFileTypeFromUnderstanding(understanding understandingResult) {
41+
f.Type = understanding.Label
42+
f.Group = understanding.Group
43+
f.Description = understanding.Description
44+
f.setupFileExtionsionInfo(understanding)
45+
}
46+
47+
func (f *File) setupFileExtionsionInfo(understanding understandingResult) {
48+
if understanding.Extension == nil {
49+
return
50+
}
51+
if understanding.Group == "image" {
52+
bts, _ := json.Marshal(understanding.Extension)
53+
ext := imageUnderstandingExtension{}
54+
json.Unmarshal(bts, &ext)
55+
f.Caption = ext.Caption
56+
if len(ext.Labels) > 0 {
57+
var tags []string
58+
linq.From(ext.Labels).SelectT(func(lable imageUnderstandingExtensionLabel) string {
59+
return lable.Label
60+
}).ToSlice(&tags)
61+
f.Tags = strings.Join(tags, ",")
62+
}
63+
}
64+
}
65+
66+
func (f *File) CalcSha256() {
67+
f.Checksum = utils.Sha256(f.Path)
68+
}
69+
70+
type imageUnderstandingExtension struct {
71+
Caption string `json:"caption"`
72+
Labels []imageUnderstandingExtensionLabel `json:"labels"`
73+
}
74+
75+
type imageUnderstandingExtensionLabel struct {
76+
Label string `json:"label"`
77+
Confidence string `json:"confidence"`
78+
}

file-server/file/file_scanner.go

+26-48
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"net/http"
1212
"regexp"
1313
"strings"
14+
"sync"
1415
)
1516

1617
type ScanOptions struct {
@@ -98,77 +99,54 @@ func StartFileScanner(ctx context.Context, c chan string, repo IFileRepository)
9899

99100
func singleFileHandler(ctx context.Context, file string, repo IFileRepository) {
100101
log.Default().Printf("handling file %s", file)
101-
// insert into database
102-
fileType, fileGroup, fileDescription, err := FileInfer(ctx, file)
103-
if err != nil {
104-
log.Default().Printf("error getting file type: %v", err)
105-
return
106-
}
107102
_file := NewFile(file)
108-
_file.SetFileType(fileType, fileGroup, fileDescription)
109-
// 如果为图片文件,对图像进行标注
110-
if fileGroup == "image" {
111-
var labels []string
112-
labels, err = ImageLabel(ctx, file)
103+
wg := sync.WaitGroup{}
104+
wg.Add(2)
105+
go func() {
106+
defer wg.Done()
107+
result, err := understanding(ctx, file)
113108
if err != nil {
114-
log.Default().Printf("error getting image label: %v", err)
109+
log.Default().Printf("error getting file type: %v", err)
115110
return
116111
}
117-
if len(labels) > 0 {
118-
_file.Tags = strings.Join(labels, ",")
119-
}
120-
}
121-
err = repo.CreateOrUpdateFile(ctx, _file)
112+
_file.SetFileTypeFromUnderstanding(result)
113+
}()
114+
go func() {
115+
defer wg.Done()
116+
_file.Checksum = utils.Sha256(file)
117+
}()
118+
// insert into database
119+
wg.Wait()
120+
err := repo.CreateOrUpdateFile(ctx, _file)
122121
if err != nil {
123122
log.Default().Printf("error inserting file %s: %v", file, err)
124123
}
125124
}
126125

127-
// FileInfer is a function to determine the file type
128-
func FileInfer(ctx context.Context, file string) (t string, g string, d string, err error) {
126+
// understanding is a function to determine the file type, and tring to label and caption this image
127+
func understanding(ctx context.Context, file string) (r understandingResult, err error) {
129128
data := bytes.NewBuffer([]byte(fmt.Sprintf(`{"path": "%s"}`, file)))
130-
request, _ := http.NewRequest(http.MethodPost, "http://localhost:8081/api/v1/file/interfer", data)
129+
request, _ := http.NewRequest(http.MethodPost, "http://192.168.163.65:8081/api/v1/file/understanding", data)
131130
request.Header.Set("Content-Type", "application/json")
132131
resp, err := http.DefaultClient.Do(request)
133132
if err != nil {
134133
log.Default().Printf("error getting file type: %v", err)
135134
return
136135
}
137136
defer resp.Body.Close()
138-
type response struct {
139-
Type string `json:"type"`
140-
Group string `json:"group"`
141-
Description string `json:"description"`
142-
}
137+
143138
bts, err := io.ReadAll(resp.Body)
144139
if err != nil {
145140
log.Default().Printf("error reading response body: %v", err)
146141
return
147142
}
148-
var r response
149143
json.Unmarshal(bts, &r)
150-
return r.Type, r.Group, r.Description, nil
144+
return
151145
}
152146

153-
func ImageLabel(ctx context.Context, file string) (labels []string, err error) {
154-
data := bytes.NewBuffer([]byte(fmt.Sprintf(`{"path": "%s"}`, file)))
155-
request, _ := http.NewRequest(http.MethodPost, "http://localhost:8081/api/v1/file/image_label", data)
156-
request.Header.Set("Content-Type", "application/json")
157-
resp, err := http.DefaultClient.Do(request)
158-
if err != nil {
159-
log.Default().Printf("error getting file type: %v", err)
160-
return
161-
}
162-
defer resp.Body.Close()
163-
type response struct {
164-
Label string `json:"label"`
165-
Confidence string `json:"confidence"`
166-
}
167-
var r []response
168-
bts, err := io.ReadAll(resp.Body)
169-
json.Unmarshal(bts, &r)
170-
for _, l := range r {
171-
labels = append(labels, l.Label)
172-
}
173-
return
147+
type understandingResult struct {
148+
Label string `json:"label"`
149+
Group string `json:"group"`
150+
Description string `json:"description"`
151+
Extension any `json:"extension"`
174152
}

file-server/file/repository.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ func (r *FileRepository) ListFileByDirectory(ctx context.Context, directory stri
4040

4141
func (r *FileRepository) CreateOrUpdateFile(ctx context.Context, file File) (err error) {
4242
return r.db.Exec(`insert into files
43-
(path, directory, extension, type, "group", description, tags ) values
44-
(?,?,?,?,?,?,?) on conflict(path) do update set directory = ?, extension = ?, type = ?, "group"=?, description=? , tags = ?;`,
45-
file.Path, file.Directory, file.Extension, file.Type, file.Group, file.Description, file.Tags,
46-
file.Directory, file.Extension, file.Type, file.Group, file.Description, file.Tags).Error
43+
(path, directory, extension, type, "group", description, tags, caption, checksum ) values
44+
(?,?,?,?,?,?,?,?,?) on conflict(path) do update set directory = ?, extension = ?, type = ?, "group"=?, description=? , tags = ?, caption = ?, checksum = ?;`,
45+
file.Path, file.Directory, file.Extension, file.Type, file.Group, file.Description, file.Tags, file.Caption, file.Checksum,
46+
file.Directory, file.Extension, file.Type, file.Group, file.Description, file.Tags, file.Caption, file.Checksum).Error
4747
}

file-server/go.mod

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@ go 1.22.4
55
require (
66
go.mongodb.org/mongo-driver v1.15.1
77
gopkg.in/yaml.v2 v2.4.0
8+
gorm.io/driver/sqlite v1.5.6
9+
gorm.io/gorm v1.25.10
810
)
911

1012
require (
13+
github.com/ahmetb/go-linq/v3 v3.2.0
1114
github.com/golang/snappy v0.0.1 // indirect
1215
github.com/jinzhu/inflection v1.0.0 // indirect
1316
github.com/jinzhu/now v1.1.5 // indirect
@@ -21,6 +24,4 @@ require (
2124
golang.org/x/crypto v0.17.0 // indirect
2225
golang.org/x/sync v0.1.0 // indirect
2326
golang.org/x/text v0.14.0 // indirect
24-
gorm.io/driver/sqlite v1.5.6 // indirect
25-
gorm.io/gorm v1.25.10 // indirect
2627
)

file-server/go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
github.com/ahmetb/go-linq/v3 v3.2.0 h1:BEuMfp+b59io8g5wYzNoFe9pWPalRklhlhbiU3hYZDE=
2+
github.com/ahmetb/go-linq/v3 v3.2.0/go.mod h1:haQ3JfOeWK8HpVxMtHHEMPVgBKiYyQ+f1/kLZh/cj9U=
13
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
24
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
35
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=

file-server/utils/file.go

+18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package utils
22

33
import (
4+
"crypto/sha256"
5+
"encoding/hex"
6+
"io"
47
"log"
58
"os"
69
"path"
@@ -63,3 +66,18 @@ func GetExtension(f string) string {
6366
}
6467
return ""
6568
}
69+
70+
func Sha256(path string) string {
71+
file, err := os.Open(path)
72+
if err != nil {
73+
log.Default().Fatalf("error opening file %s: %v", path, err)
74+
}
75+
defer file.Close()
76+
77+
hash := sha256.New()
78+
if _, err := io.Copy(hash, file); err != nil {
79+
log.Default().Fatalf("error calculating SHA256 for file %s: %v", path, err)
80+
}
81+
82+
return hex.EncodeToString(hash.Sum(nil))
83+
}

0 commit comments

Comments
 (0)