Refactor auto-boot-ollama-host script into modular structure with separate configuration, network, SSH, and service management modules for improved maintainability and readability
This commit is contained in:
@@ -7,204 +7,72 @@
|
||||
|
||||
local socket = require("socket")
|
||||
|
||||
local function getenv(name, def)
|
||||
local v = os.getenv(name)
|
||||
return (v ~= nil and v ~= "") and v or def
|
||||
end
|
||||
-- Import modules
|
||||
local config = require("config")
|
||||
local utils = require("utils")
|
||||
local network = require("network")
|
||||
local ssh = require("ssh")
|
||||
local ollama_manager = require("ollama_manager")
|
||||
|
||||
-- Check if a TCP port is accepting connections within a timeout (seconds)
|
||||
local function port_is_up(host, port, timeout_sec)
|
||||
host = tostring(host or "127.0.0.1")
|
||||
port = tonumber(port or 0) or 0
|
||||
local timeout = tonumber(timeout_sec or 1) or 1
|
||||
if port <= 0 then return false end
|
||||
-- Handle error pattern detection and recovery
|
||||
local function handle_error_pattern(config, powered_on)
|
||||
utils.log(("Detected EHOSTUNREACH for Ollama (%s:%d)."):format(config.OLLAMA_HOST, config.OLLAMA_PORT))
|
||||
|
||||
local deadline = socket.gettime() + timeout
|
||||
while socket.gettime() < deadline do
|
||||
local tcp = socket.tcp()
|
||||
if not tcp then return false end
|
||||
tcp:settimeout(1)
|
||||
local ok = tcp:connect(host, port)
|
||||
tcp:close()
|
||||
if ok then return true end
|
||||
socket.sleep(0.5)
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
-- ---- Config via env ----
|
||||
local CONTAINER_NAME = getenv("CONTAINER_NAME", "paperless-ai")
|
||||
local SINCE = getenv("SINCE", "0s")
|
||||
local OLLAMA_HOST = getenv("OLLAMA_HOST", "192.168.222.12")
|
||||
local OLLAMA_PORT = tonumber(getenv("OLLAMA_PORT", "11434"))
|
||||
local SSH_PORT = tonumber(getenv("SSH_PORT", "22"))
|
||||
local SSH_USER = getenv("SSH_USER", "user")
|
||||
local SSH_IDENTITY_FILE = getenv("SSH_IDENTITY_FILE", "/root/.ssh/id_rsa") -- e.g. "/path/to/id_rsa"
|
||||
local ERROR_PATTERN = getenv(
|
||||
"ERROR_PATTERN",
|
||||
("[ERROR] Document analysis failed: connect EHOSTUNREACH %s:%d"):format(OLLAMA_HOST, OLLAMA_PORT)
|
||||
)
|
||||
local FINISH_PATTERN = getenv("FINISH_PATTERN", "[DEBUG] Finished fetching. Found 0 documents.") -- e.g. "Server started"
|
||||
|
||||
-- Optional Wake-on-LAN
|
||||
local WOL_MAC = getenv("WOL_MAC", "") -- e.g. "AA:BB:CC:DD:EE:FF"
|
||||
local WOL_BCAST = getenv("WOL_BCAST", "192.168.222.255")
|
||||
local WOL_PORT = tonumber(getenv("WOL_PORT", "9"))
|
||||
|
||||
-- Optional: wait for service to come up (kept commented to stay minimal)
|
||||
-- local UP_WAIT_TIMEOUT = tonumber(getenv("UP_WAIT_TIMEOUT", "90"))
|
||||
|
||||
local function log(msg)
|
||||
io.stdout:write(os.date("[%F %T] "), msg, "\n"); io.stdout:flush()
|
||||
end
|
||||
|
||||
-- "AA:BB:CC:DD:EE:FF" -> 6 bytes
|
||||
local function mac_to_bytes(mac)
|
||||
local bytes = {}
|
||||
for byte in mac:gmatch("(%x%x)") do table.insert(bytes, tonumber(byte, 16)) end
|
||||
if #bytes ~= 6 then return nil end
|
||||
return string.char(table.unpack(bytes))
|
||||
end
|
||||
|
||||
local function send_wol(mac_str, bcast_ip, port)
|
||||
-- Build magic packet
|
||||
local bytes = {}
|
||||
for byte in mac_str:gmatch("(%x%x)") do table.insert(bytes, tonumber(byte, 16)) end
|
||||
if #bytes ~= 6 then return false, "invalid MAC" end
|
||||
local mac = string.char(table.unpack(bytes))
|
||||
local packet = string.rep(string.char(0xFF), 6) .. mac:rep(16)
|
||||
|
||||
-- Create IPv4 UDP socket (udp4 if available), bind to IPv4 wildcard to lock AF_INET
|
||||
local udp = assert((socket.udp4 or socket.udp)())
|
||||
udp:settimeout(2)
|
||||
assert(udp:setsockname("0.0.0.0", 0)) -- force IPv4 family
|
||||
assert(udp:setoption("broadcast", true)) -- allow broadcast
|
||||
|
||||
local ok, err = udp:sendto(packet, bcast_ip, port)
|
||||
udp:close()
|
||||
return ok ~= nil, err
|
||||
end
|
||||
|
||||
-- Execute a remote command over SSH.
|
||||
-- Signature must remain: ssh(command, user, host, port, identity_file)
|
||||
local function ssh(command, user, host, port, identity_file)
|
||||
-- Basic validation and defaults
|
||||
user = tostring(user or "")
|
||||
host = tostring(host or "")
|
||||
port = tonumber(port or 22) or 22
|
||||
identity_file = tostring(identity_file or "")
|
||||
|
||||
-- Quote a string for safe single-quoted POSIX shell context
|
||||
local function sq(s)
|
||||
-- Replace ' with: '\'' (close, escape quote, reopen)
|
||||
return "'" .. tostring(s):gsub("'", "'\\''") .. "'"
|
||||
-- Send Wake-on-LAN if configured
|
||||
if config.WOL_MAC ~= "" then
|
||||
utils.log(("Sending WOL to %s via %s:%d"):format(config.WOL_MAC, config.WOL_BCAST, config.WOL_PORT))
|
||||
local ok, err = network.send_wol(config.WOL_MAC, config.WOL_BCAST, config.WOL_PORT)
|
||||
if ok then
|
||||
powered_on = true
|
||||
utils.log(("Successfully sent WOL to %s via %s:%d"):format(config.WOL_MAC, config.WOL_BCAST, config.WOL_PORT))
|
||||
else
|
||||
utils.log("WOL failed: " .. tostring(err))
|
||||
end
|
||||
end
|
||||
|
||||
-- Build base ssh command (run locally)
|
||||
-- -oBatchMode to avoid interactive prompts
|
||||
-- -oConnectTimeout for faster failure
|
||||
-- -oStrictHostKeyChecking uses known_hosts; adjust if needed
|
||||
local dest = (user ~= "" and (user .. "@" .. host) or host)
|
||||
local pieces = {
|
||||
"ssh",
|
||||
"-p", tostring(port),
|
||||
"-o", "BatchMode=yes",
|
||||
"-o", "ConnectTimeout=30",
|
||||
"-o", "ServerAliveInterval=5",
|
||||
"-o", "ServerAliveCountMax=1",
|
||||
"-o", "UserKnownHostsFile=/root/.ssh/known_hosts",
|
||||
"-o", "StrictHostKeyChecking=yes",
|
||||
}
|
||||
if identity_file ~= "" then
|
||||
table.insert(pieces, "-i"); table.insert(pieces, identity_file)
|
||||
end
|
||||
table.insert(pieces, dest)
|
||||
|
||||
-- Pass remote command as provided; caller is responsible for proper quoting.
|
||||
table.insert(pieces, "--")
|
||||
table.insert(pieces, command)
|
||||
|
||||
-- Join with spaces for os.execute
|
||||
local function join(args)
|
||||
-- We only quote the remote command explicitly. Other args are simple tokens.
|
||||
return table.concat(args, " ")
|
||||
end
|
||||
|
||||
local full = join(pieces)
|
||||
log("SSH exec: " .. full)
|
||||
local ok, reason, code = os.execute(full)
|
||||
if ok == true or ok == 0 then
|
||||
log("SSH command completed successfully")
|
||||
return true
|
||||
else
|
||||
local msg = string.format("SSH failed: reason=%s code=%s", tostring(reason), tostring(code))
|
||||
log(msg)
|
||||
return false, msg
|
||||
-- Wait for SSH and start service
|
||||
utils.log("Waiting for SSH to become reachable...")
|
||||
if network.port_is_up(config.OLLAMA_HOST, config.SSH_PORT, 60) then
|
||||
ollama_manager.start_service(config)
|
||||
end
|
||||
end
|
||||
|
||||
-- Handle finish pattern detection and shutdown
|
||||
local function handle_finish_pattern(config)
|
||||
utils.log(("Detected finish pattern: %q"):format(config.FINISH_PATTERN))
|
||||
ollama_manager.stop_service_and_shutdown(config)
|
||||
end
|
||||
|
||||
-- Main application logic
|
||||
local function main()
|
||||
log(("Watching container='%s' since='%s'"):format(CONTAINER_NAME, SINCE))
|
||||
log(("Looking for pattern: %q"):format(ERROR_PATTERN))
|
||||
utils.log(("Watching container='%s' since='%s'"):format(config.CONTAINER_NAME, config.SINCE))
|
||||
utils.log(("Looking for pattern: %q"):format(config.ERROR_PATTERN))
|
||||
|
||||
local cmd = ("docker logs -f --since %q %q 2>&1"):format(SINCE, CONTAINER_NAME)
|
||||
local cmd = ("docker logs -f --since %q %q 2>&1"):format(config.SINCE, config.CONTAINER_NAME)
|
||||
local powered_on = false
|
||||
|
||||
while true do
|
||||
|
||||
local fh = assert(io.popen(cmd, "r"))
|
||||
|
||||
for line in fh:lines() do
|
||||
-- Plain substring match (no regex)
|
||||
if line:find(ERROR_PATTERN, 1, true) ~= nil then
|
||||
log(("Detected EHOSTUNREACH for Ollama (%s:%d)."):format(OLLAMA_HOST, OLLAMA_PORT))
|
||||
|
||||
if WOL_MAC ~= "" then
|
||||
log(("Sending WOL to %s via %s:%d"):format(WOL_MAC, WOL_BCAST, WOL_PORT))
|
||||
local ok, err = send_wol(WOL_MAC, WOL_BCAST, WOL_PORT)
|
||||
if ok then
|
||||
powered_on = true
|
||||
log(("Sucessfully sent WOL to %s via %s:%d"):format(WOL_MAC, WOL_BCAST, WOL_PORT))
|
||||
else
|
||||
log("WOL failed: " .. tostring(err))
|
||||
end
|
||||
end
|
||||
|
||||
log("Waiting for SSH to become reachable...")
|
||||
|
||||
if port_is_up(OLLAMA_HOST, SSH_PORT, 60) then
|
||||
log("SSH is reachable. Starting ollama service...")
|
||||
socket.sleep(5)
|
||||
ssh("nssm start ollama", SSH_USER, OLLAMA_HOST, SSH_PORT, SSH_IDENTITY_FILE)
|
||||
--ssh('wsl.exe -d Debian -- sudo systemctl enable ollama', SSH_USER, OLLAMA_HOST, SSH_PORT, SSH_IDENTITY_FILE)
|
||||
--ssh('wsl.exe -d Debian -- sudo systemctl start ollama', SSH_USER, OLLAMA_HOST, SSH_PORT, SSH_IDENTITY_FILE)
|
||||
if (port_is_up(OLLAMA_HOST, OLLAMA_PORT, 90)) then
|
||||
log("Ollama service is reachable again.")
|
||||
socket.sleep(30)
|
||||
break
|
||||
else
|
||||
log("Timeout waiting for Ollama service to come up after SSH command.")
|
||||
end
|
||||
end
|
||||
-- Handle error pattern detection
|
||||
if line:find(config.ERROR_PATTERN, 1, true) ~= nil then
|
||||
handle_error_pattern(config, powered_on)
|
||||
powered_on = true
|
||||
end
|
||||
|
||||
if line:find(FINISH_PATTERN, 1, true) ~= nil and powered_on == true then
|
||||
log(("Detected finish pattern: %q"):format(FINISH_PATTERN))
|
||||
log("Shutting down Ollama host to save power...")
|
||||
ssh("nssm stop ollama", SSH_USER, OLLAMA_HOST, SSH_PORT, SSH_IDENTITY_FILE)
|
||||
--ssh('wsl.exe -d Debian -- sudo systemctl disable ollama', SSH_USER, OLLAMA_HOST, SSH_PORT, SSH_IDENTITY_FILE)
|
||||
--ssh('wsl.exe -d Debian -- sudo systemctl stop ollama', SSH_USER, OLLAMA_HOST, SSH_PORT, SSH_IDENTITY_FILE)
|
||||
ssh("shutdown.exe /s /t 0", SSH_USER, OLLAMA_HOST, SSH_PORT, SSH_IDENTITY_FILE)
|
||||
socket.sleep(5)
|
||||
-- Handle finish pattern detection
|
||||
if line:find(config.FINISH_PATTERN, 1, true) ~= nil and powered_on == true then
|
||||
handle_finish_pattern(config)
|
||||
powered_on = false
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
fh:close()
|
||||
log("Restarting log watch loop...")
|
||||
utils.log("Restarting log watch loop...")
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
-- Run the application
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user