-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathserver.go
236 lines (192 loc) · 6.02 KB
/
server.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
package firecracker
import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"sync"
"github.com/lithammer/shortuuid/v4"
"github.com/loopholelabs/goroutine-manager/pkg/manager"
"golang.org/x/sys/unix"
"k8s.io/utils/inotify"
)
var (
errSignalKilled = errors.New("signal: killed")
ErrNoSocketCreated = errors.New("no socket created")
ErrFirecrackerExited = errors.New("firecracker exited")
ErrCouldNotCreateVMPathDirectory = errors.New("could not create VM path directory")
ErrCouldNotCreateInotifyWatcher = errors.New("could not create inotify watcher")
ErrCouldNotAddInotifyWatch = errors.New("could not add inotify watch")
ErrCouldNotReadNUMACPUList = errors.New("could not read NUMA CPU list")
ErrCouldNotStartFirecrackerServer = errors.New("could not start firecracker server")
ErrCouldNotCloseWatcher = errors.New("could not close watcher")
ErrCouldNotCloseServer = errors.New("could not close server")
ErrCouldNotWaitForFirecracker = errors.New("could not wait for firecracker")
)
const (
FirecrackerSocketName = "firecracker.sock"
)
type FirecrackerServer struct {
VMPath string
VMPid int
Wait func() error
Close func() error
}
func StartFirecrackerServer(
ctx context.Context,
firecrackerBin string,
jailerBin string,
chrootBaseDir string,
uid int,
gid int,
netns string,
numaNode int,
cgroupVersion int,
enableOutput bool,
enableInput bool,
) (server *FirecrackerServer, errs error) {
server = &FirecrackerServer{
Wait: func() error {
return nil
},
Close: func() error {
return nil
},
}
goroutineManager := manager.NewGoroutineManager(
ctx,
&errs,
manager.GoroutineManagerHooks{},
)
defer goroutineManager.Wait()
defer goroutineManager.StopAllGoroutines()
defer goroutineManager.CreateBackgroundPanicCollector()()
id := shortuuid.New()
server.VMPath = filepath.Join(chrootBaseDir, "firecracker", id, "root")
if err := os.MkdirAll(server.VMPath, os.ModePerm); err != nil {
panic(errors.Join(ErrCouldNotCreateVMPathDirectory, err))
}
watcher, err := inotify.NewWatcher()
if err != nil {
panic(errors.Join(ErrCouldNotCreateInotifyWatcher, err))
}
defer watcher.Close()
if err := watcher.AddWatch(server.VMPath, inotify.InCreate); err != nil {
panic(errors.Join(ErrCouldNotAddInotifyWatch, err))
}
cpus, err := os.ReadFile(filepath.Join("/sys", "devices", "system", "node", fmt.Sprintf("node%v", numaNode), "cpulist"))
if err != nil {
panic(errors.Join(ErrCouldNotReadNUMACPUList, err))
}
cmd := exec.CommandContext(
ctx, // We use ctx, not goroutineManager.Context() here since this resource outlives the function call
jailerBin,
"--chroot-base-dir",
chrootBaseDir,
"--uid",
fmt.Sprintf("%v", uid),
"--gid",
fmt.Sprintf("%v", gid),
"--netns",
filepath.Join("/var", "run", "netns", netns),
"--cgroup-version",
fmt.Sprintf("%v", cgroupVersion),
"--cgroup",
fmt.Sprintf("cpuset.mems=%v", numaNode),
"--cgroup",
fmt.Sprintf("cpuset.cpus=%s", cpus),
"--id",
id,
"--exec-file",
firecrackerBin,
"--",
"--api-sock",
FirecrackerSocketName,
)
if enableOutput {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if enableInput {
cmd.Stdin = os.Stdin
} else {
// Don't forward CTRL-C etc. signals from parent to child process
// We can't enable this if we set the cmd stdin or we deadlock
cmd.SysProcAttr = &unix.SysProcAttr{
Setpgid: true,
Pgid: 0,
}
}
if err := cmd.Start(); err != nil {
panic(errors.Join(ErrCouldNotStartFirecrackerServer, err))
}
server.VMPid = cmd.Process.Pid
var closeLock sync.Mutex
closed := false
// We can only run this once since `cmd.Wait()` releases resources after the first call
server.Wait = sync.OnceValue(func() error {
if err := cmd.Wait(); err != nil {
closeLock.Lock()
defer closeLock.Unlock()
if closed && (err.Error() == errSignalKilled.Error()) { // Don't treat killed errors as errors if we killed the process
return nil
}
return errors.Join(ErrFirecrackerExited, err)
}
return nil
})
// It is safe to start a background goroutine here since we return a wait function
// Despite returning a wait function, we still need to start this goroutine however so that any errors
// we get as we're polling the socket path directory are caught
// It's important that we start this _after_ calling `cmd.Start`, otherwise our process would be nil
goroutineManager.StartBackgroundGoroutine(func(_ context.Context) {
if err := server.Wait(); err != nil {
panic(errors.Join(ErrCouldNotWaitForFirecracker, err))
}
})
goroutineManager.StartForegroundGoroutine(func(_ context.Context) {
// Cause the `range Watcher.Event` loop to break if context is cancelled, e.g. when command errors
<-goroutineManager.Context().Done()
if err := watcher.Close(); err != nil {
panic(errors.Join(ErrCouldNotCloseWatcher, err))
}
})
// If the context is cancelled, shut down the server
goroutineManager.StartBackgroundGoroutine(func(_ context.Context) {
// Cause the Firecracker process to be closed if context is cancelled - cancelling `ctx` on the `exec.Command`
// doesn't actually stop it, it only stops trying to start it!
<-ctx.Done() // We use ctx, not goroutineManager.Context() here since this resource outlives the function call
if err := server.Close(); err != nil {
panic(errors.Join(ErrCouldNotCloseServer, err))
}
})
socketCreated := false
socketPath := filepath.Join(server.VMPath, FirecrackerSocketName)
for ev := range watcher.Event {
if filepath.Clean(ev.Name) == filepath.Clean(socketPath) {
socketCreated = true
break
}
}
if !socketCreated {
panic(ErrNoSocketCreated)
}
server.Close = func() error {
if cmd.Process != nil {
closeLock.Lock()
// We can't trust `cmd.Process != nil` - without this check we could get `os.ErrProcessDone` here on the second `Kill()` call
if !closed {
closed = true
if err := cmd.Process.Kill(); err != nil {
closeLock.Unlock()
return err
}
}
closeLock.Unlock()
}
return server.Wait()
}
return
}