'cloudron' is a bit redundant and matches our UI text 'services' reorder the help to be alphabetical change cli args to plural
937 lines
36 KiB
Bash
Executable File
937 lines
36 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -eu -o pipefail
|
|
|
|
# scripts requires root
|
|
if [[ ${EUID} -ne 0 ]]; then
|
|
echo "This script should be run as root. Run with sudo"
|
|
exit 1
|
|
fi
|
|
|
|
readonly RED='\033[31m'
|
|
readonly GREEN='\033[32m'
|
|
readonly YELLOW='\033[33m'
|
|
readonly BOLD='\033[1m'
|
|
readonly DONE='\033[m'
|
|
|
|
readonly PASTEBIN="https://paste.cloudron.io"
|
|
readonly LINE="\n========================================================\n"
|
|
readonly HELP_MESSAGE="
|
|
Cloudron Support and Diagnostics Tool
|
|
|
|
See https://docs.cloudron.io/troubleshooting for more information on troubleshooting.
|
|
|
|
Options:
|
|
--apply-db-migrations Applies all pending DB migrations
|
|
--check-db-migrations Checks if the DB migrations are up to date
|
|
--check-services Checks if services/addons are running and healthy.
|
|
--disable-dnssec Disable DNSSEC
|
|
--enable-remote-support Enable SSH Remote Access for the Cloudron support team
|
|
--disable-remote-support Disable SSH Remote Access for the Cloudron support team
|
|
--fix-docker-version Ensures the correct docker version is installed
|
|
--owner-login Login as owner
|
|
--patch Apply a patch from git. WARNING: Do not use unless you know what you are doing!
|
|
--recreate-containers Deletes all existing containers and recreates them without loss of data
|
|
--recreate-docker Deletes docker storage (containers and images) and recreates it without loss of data
|
|
--send-diagnostics Collects server diagnostics and uploads it to ${PASTEBIN}
|
|
--troubleshoot Dashboard down? Run tests to identify the potential problem
|
|
--unbound-use-external-dns Forwards all Unbound requests to Google (8.8.8.8) and Cloudflare (1.1.1.1) DNS servers.
|
|
Unbound is the internal DNS server used for recursive DNS queries. This is only needed
|
|
if your network does not allow outbound DNS requests.
|
|
--help Show this message
|
|
"
|
|
|
|
function success() {
|
|
echo -e "[${GREEN}OK${DONE}]\t${1}"
|
|
}
|
|
|
|
function info() {
|
|
echo -e "\t${1}"
|
|
}
|
|
|
|
function warn() {
|
|
echo -e "[${YELLOW}WARN${DONE}]\t${1}"
|
|
}
|
|
|
|
function fail() {
|
|
echo -e "[${RED}FAIL${DONE}]\t${1}" >&2
|
|
}
|
|
|
|
function enable_remote_support() {
|
|
local -r cloudron_support_public_key="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGWS+930b8QdzbchGljt3KSljH9wRhYvht8srrtQHdzg support@cloudron.io"
|
|
local -r ssh_user="cloudron-support"
|
|
local -r keys_file="/home/cloudron-support/.ssh/authorized_keys"
|
|
|
|
echo -e "
|
|
================= ${BOLD}SSH ACCESS DISCLAIMER${DONE} =================
|
|
|
|
By granting us SSH or remote access to your systems, you acknowledge and
|
|
agree to the following:
|
|
|
|
1. ${BOLD}Access to Customer Data${DONE}
|
|
Our team may have visibility into customer data during the course of
|
|
investigating or resolving issues. While we take all reasonable steps to
|
|
respect your privacy and handle data securely, you acknowledge that such
|
|
access may occur as part of the support process.
|
|
|
|
2. ${BOLD}No Liability for Data Loss or System Changes${DONE}
|
|
Although we strive to exercise caution and due diligence, you acknowledge
|
|
and accept that:
|
|
${BOLD}-${DONE} There is an inherent risk of data loss, corruption, or system
|
|
disruption during troubleshooting or configuration changes.
|
|
${BOLD}-${DONE} We shall not be held liable for any loss of data, service
|
|
downtime, or unintended consequences arising from our access or any
|
|
actions taken during the support process.
|
|
|
|
3. ${BOLD}Backups and Safeguards${DONE}
|
|
You are solely responsible for ensuring that up-to-date and complete
|
|
backups of your systems and data exist prior to granting us access.
|
|
|
|
4. ${BOLD}Local Changes and Auto-Updates${DONE}
|
|
Your system may receive automatic updates as part of regular maintenance or
|
|
feature releases. Any local modifications or patches applied during support
|
|
may be overwritten by future updates. ${BOLD}-${DONE} It is the customer's
|
|
responsibility to track such changes and reapply them if necessary, or to
|
|
coordinate with us for permanent integration where applicable.
|
|
|
|
5. ${BOLD}Consent to Proceed${DONE}
|
|
By providing access, you confirm that you have read, understood, and agreed
|
|
to the terms above and expressly authorize us to proceed with accessing
|
|
your systems for support purposes.
|
|
|
|
=======================================================================
|
|
"
|
|
read -p "Do you accept these terms? [y/N] " choice
|
|
choice=${choice:-n}
|
|
|
|
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
|
|
|
|
echo -n "Terms accepted. Enabling Remote Access for the Cloudron support team..."
|
|
mkdir -p $(dirname "${keys_file}") # .ssh does not exist sometimes
|
|
touch "${keys_file}" # required for concat to work
|
|
if ! grep -q "${cloudron_support_public_key}" "${keys_file}"; then
|
|
echo -e "\n${cloudron_support_public_key}" >> "${keys_file}"
|
|
chmod 600 "${keys_file}"
|
|
chown "${ssh_user}" "${keys_file}"
|
|
fi
|
|
|
|
echo "Done"
|
|
}
|
|
|
|
function disable_remote_support() {
|
|
local -r cloudron_support_public_key="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGWS+930b8QdzbchGljt3KSljH9wRhYvht8srrtQHdzg support@cloudron.io"
|
|
local -r ssh_user="cloudron-support"
|
|
local -r keys_file="/home/cloudron-support/.ssh/authorized_keys"
|
|
|
|
echo -n "Disabling Remote Access for the Cloudron support team..."
|
|
mkdir -p $(dirname "${keys_file}") # .ssh does not exist sometimes
|
|
touch "${keys_file}" # required for del below to work
|
|
if grep -q "${cloudron_support_public_key}" "${keys_file}"; then
|
|
sed "/${cloudron_support_public_key}/d" -i "${keys_file}"
|
|
fi
|
|
|
|
echo "Done"
|
|
}
|
|
|
|
function wait_systemd_service() {
|
|
local -r service="$1"
|
|
|
|
for i in {1..3}; do
|
|
ts=$(systemctl show "${service}" -p ActiveEnterTimestamp | sed 's/ActiveEnterTimestamp=//g')
|
|
start=$(date '+%s' --date="${ts}")
|
|
now=$(date '+%s')
|
|
|
|
up_time=$(( $now - $start ))
|
|
(( up_time > 10 )) && return 0
|
|
|
|
info "Service '${service}' just started $up_time secs ago, checking health again in 10s"
|
|
sleep 11
|
|
done
|
|
|
|
return 1
|
|
}
|
|
|
|
function check_host_mysql() {
|
|
if ! systemctl is-active -q mysql; then
|
|
info "MySQL is down. Trying to restart MySQL ..."
|
|
|
|
systemctl restart mysql
|
|
|
|
if ! systemctl is-active -q mysql; then
|
|
fail "MySQL is still down, please investigate the error by inspecting /var/log/mysql/error.log"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if ! wait_systemd_service mysql; then
|
|
fail "MySQL keeps restarting, please investigate the error by inspecting /var/log/mysql/error.log"
|
|
exit 1
|
|
fi
|
|
|
|
success "MySQL is running"
|
|
}
|
|
|
|
function check_box() {
|
|
[[ -f /home/yellowtent/box/VERSION ]] && version=$(cat /home/yellowtent/box/VERSION) || version='<unknown>'
|
|
|
|
if ! systemctl is-active -q box; then
|
|
info "box v${version} is down. re-running migration script and restarting it ..."
|
|
|
|
/home/yellowtent/box/setup/start.sh
|
|
systemctl stop box # a restart sometimes doesn't restart, no idea
|
|
systemctl start box
|
|
|
|
if ! systemctl is-active -q box; then
|
|
fail "box service is still down, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if ! wait_systemd_service box; then
|
|
fail "box service keeps restarting, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log"
|
|
exit 1
|
|
fi
|
|
|
|
success "box v${version} is running"
|
|
}
|
|
|
|
function check_netplan() {
|
|
if ! output=$(netplan get all 2>/dev/null); then
|
|
fail "netplan is not working"
|
|
exit 1
|
|
fi
|
|
|
|
if [[ -z "${output}" ]]; then
|
|
warn "netplan configuration is empty. this might be OK depending on your networking setup"
|
|
else
|
|
success "netplan is good"
|
|
fi
|
|
}
|
|
|
|
function owner_login() {
|
|
check_host_mysql >/dev/null
|
|
|
|
local -r owner_username=$(mysql -NB -uroot -ppassword -e "SELECT username FROM box.users WHERE role='owner' AND username IS NOT NULL AND active=1 ORDER BY creationTime LIMIT 1" 2>/dev/null)
|
|
local -r owner_password=$(pwgen -1s 12)
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
mysql -NB -uroot -ppassword -e "INSERT INTO box.settings (name, value) VALUES ('ghosts_config', '{\"${owner_username}\":\"${owner_password}\"}') ON DUPLICATE KEY UPDATE name='ghosts_config', value='{\"${owner_username}\":\"${owner_password}\"}'" 2>/dev/null
|
|
echo "Login at https://my.${dashboard_domain} as ${owner_username} / ${owner_password} . This password may only be used once."
|
|
}
|
|
|
|
function send_diagnostics() {
|
|
local -r log="/tmp/cloudron-support.log"
|
|
|
|
echo -n "Generating Cloudron Support stats..."
|
|
|
|
rm -rf $log
|
|
|
|
echo -e $LINE"Linux"$LINE >> $log
|
|
uname -nar &>> $log
|
|
|
|
echo -e $LINE"Ubuntu"$LINE >> $log
|
|
lsb_release -a &>> $log
|
|
|
|
echo -e $LINE"Cloudron"$LINE >> $log
|
|
cloudron_version=$(cat /home/yellowtent/box/VERSION || true)
|
|
echo -e "Cloudron version: ${cloudron_version}" >> $log
|
|
dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null || true)
|
|
echo -e "Dashboard domain: ${dashboard_domain}" >> $log
|
|
|
|
echo -e $LINE"Docker"$LINE >> $log
|
|
if ! timeout --kill-after 10s 15s docker system info &>> $log 2>&1; then
|
|
echo -e "Docker (system info) is not responding" >> $log
|
|
fi
|
|
|
|
if ! timeout --kill-after 10s 15s docker ps -a &>> $log 2>&1; then
|
|
echo -e "Docker (ps) is not responding" >> $log
|
|
fi
|
|
|
|
echo -e $LINE"Filesystem stats"$LINE >> $log
|
|
if ! timeout --kill-after 10s 15s df -h &>> $log 2>&1; then
|
|
echo -e "df is not responding" >> $log
|
|
fi
|
|
|
|
echo -e $LINE"Appsdata stats"$LINE >> $log
|
|
du -hcsL /home/yellowtent/appsdata/* &>> $log || true
|
|
|
|
echo -e $LINE"Boxdata stats"$LINE >> $log
|
|
du -hcsL /home/yellowtent/boxdata/* &>> $log
|
|
|
|
echo -e $LINE"Backup stats (possibly misleading)"$LINE >> $log
|
|
du -hcsL /var/backups/* &>> $log || true
|
|
|
|
echo -e $LINE"System daemon status"$LINE >> $log
|
|
systemctl status --lines=100 --no-pager --full box mysql unbound cloudron-syslog nginx docker &>> $log
|
|
|
|
echo -e $LINE"Box logs"$LINE >> $log
|
|
tail -n 100 /home/yellowtent/platformdata/logs/box.log &>> $log
|
|
|
|
echo -e $LINE"Interface Info"$LINE >> $log
|
|
ip addr &>> $log
|
|
|
|
echo -e $LINE"Firewall chains"$LINE >> $log
|
|
iptables -L &>> $log
|
|
has_ipv6=$(cat /proc/net/if_inet6 >/dev/null 2>&1 && echo "yes" || echo "no")
|
|
echo -e "IPv6: ${has_ipv6}" >> $log
|
|
[[ "${has_ipv6}" == "yes" ]] && ip6tables -L &>> $log
|
|
|
|
echo "Done"
|
|
|
|
echo -n "Uploading information..."
|
|
paste_key=$(curl -X POST ${PASTEBIN}/documents --silent --data-binary "@$log" | python3 -c "import sys, json; print(json.load(sys.stdin)['key'])")
|
|
echo "Done"
|
|
|
|
echo -e "\nPlease email the following link to support@cloudron.io : ${PASTEBIN}/${paste_key}"
|
|
}
|
|
|
|
function check_dns() {
|
|
if host cloudron.io &>/dev/null; then
|
|
success "DNS is resolving via systemd-resolved"
|
|
return
|
|
fi
|
|
|
|
if ! systemctl is-active -q systemd-resolved; then
|
|
warn "systemd-resolved is not in use. see 'systemctl status systemd-resolved'"
|
|
fi
|
|
|
|
if [[ -L /etc/resolv.conf ]]; then
|
|
target=$(readlink /etc/resolv.conf)
|
|
if [[ "$target" != *"/run/systemd/resolve/stub-resolv.conf" ]]; then
|
|
warn "/etc/resolv.conf is symlinked to $target instead of '../run/systemd/resolve/stub-resolv.conf'"
|
|
fi
|
|
else
|
|
warn "/etc/resolv.conf is not symlinked to '../run/systemd/resolve/stub-resolv.conf'"
|
|
fi
|
|
|
|
if ! grep -q "^nameserver 127.0.0.53" /etc/resolv.conf; then
|
|
warn "/etc/resolv.conf is not using systemd-resolved. it is missing the line 'nameserver 127.0.0.53'"
|
|
fi
|
|
|
|
fail "DNS is not resolving"
|
|
host cloudron.io || true
|
|
exit 1
|
|
}
|
|
|
|
function check_unbound() {
|
|
if ! systemctl is-active -q unbound; then
|
|
info "unbound is down. restarting to see if it fixes it" # unbound-anchor is part of ExecStartPre
|
|
systemctl restart unbound
|
|
|
|
if ! systemctl is-active -q unbound; then
|
|
fail "unbound is still down, please investigate the error using 'journalctl -u unbound'"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if ! wait_systemd_service unbound; then
|
|
fail "unbound service keeps restarting, please investigate the error using 'journalctl -u unbound'"
|
|
exit 1
|
|
fi
|
|
|
|
if ! host cloudron.io 127.0.0.150 &>/dev/null; then
|
|
fail "Unbound is not resolving, maybe try forwarding all DNS requests. You can do this by running 'cloudron-support --unbound-use-external-dns' option"
|
|
host cloudron.io 127.0.0.150
|
|
exit 1
|
|
fi
|
|
|
|
success "unbound is running"
|
|
}
|
|
|
|
function check_dashboard_cert() {
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
local -r nginx_conf_file="/home/yellowtent/platformdata/nginx/applications/dashboard/my.${dashboard_domain}.conf"
|
|
local -r cert_file=$(sed -n -e 's/.*ssl_certificate [[:space:]]\+\(.*\);/\1/p' "${nginx_conf_file}")
|
|
|
|
local -r cert_expiry_date=$(openssl x509 -enddate -noout -in "${cert_file}" | sed -e 's/notAfter=//')
|
|
|
|
if ! openssl x509 -checkend 100 -noout -in "${cert_file}" >/dev/null 2>&1; then
|
|
fail "Certificate has expired. Certificate expired at ${cert_expiry_date}"
|
|
|
|
local -r task_id=$(mysql -NB -uroot -ppassword -e "SELECT id FROM box.tasks WHERE type='checkCerts' ORDER BY id DESC LIMIT 1" 2>/dev/null)
|
|
echo -e "\tPlease check /home/yellowtent/platformdata/logs/tasks/${task_id}.log for last cert renewal logs"
|
|
echo -e "\tCommon issues include expiry of domain's API key OR incoming http port 80 not being open"
|
|
exit 1
|
|
fi
|
|
|
|
success "dashboard cert is valid"
|
|
}
|
|
|
|
function check_nginx() {
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
|
|
# it is possible nginx is running but can't be restarted
|
|
if ! systemctl reload -q nginx; then
|
|
fail "nginx is down. Removing extraneous dashboard domain configs ..."
|
|
|
|
# we had a bug where old dashboard domain config file was kept around
|
|
cd /home/yellowtent/platformdata/nginx/applications/dashboard/ && find . ! -name "my.${dashboard_domain}.conf" -type f -exec rm -f {} +
|
|
|
|
# check if certificates are there. nginx will still start if certs are expired
|
|
# IFS= makes sure it doesn't trim leading and trailing whitespace
|
|
# -r prevents interpretation of \ escapes.
|
|
find /home/yellowtent/platformdata/nginx -type f -name '*.conf' -print0 | while IFS= read -r -d '' conf; do
|
|
cert_file=$(sed -ne 's/[[:blank:]]\+ssl_certificate[[:blank:]]\+\(.*\);/\1/p' "${conf}")
|
|
key_file=$(sed -ne 's/[[:blank:]]\+ssl_certificate_key[[:blank:]]\+\(.*\);/\1/p' "${conf}")
|
|
|
|
if [[ -n "${cert_file}" && ! -f "${cert_file}" ]]; then
|
|
info "${cert_file} does not exist. removing ${conf}"
|
|
rm -f "${conf}"
|
|
fi
|
|
|
|
if [[ -n "${key_file}" && ! -f "${key_file}" ]]; then
|
|
info "${key_file} does not exist. removing ${conf}"
|
|
rm -f "${conf}"
|
|
fi
|
|
done
|
|
|
|
systemctl restart nginx
|
|
|
|
if ! systemctl is-active -q nginx; then
|
|
fail "nginx is still down, please investigate the error by inspecting 'journalctl -u nginx' and /var/log/nginx/error.log"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if ! wait_systemd_service nginx; then
|
|
fail "nginx service keeps restarting, please investigate the error using 'journalctl -u nginx' and /var/log/nginx/error.log"
|
|
exit 1
|
|
fi
|
|
|
|
success "nginx is running"
|
|
}
|
|
|
|
# this confirms that https works properly without any proxy (cloudflare) involved
|
|
function check_dashboard_site_loopback() {
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
|
|
if ! curl --fail -s --resolve "my.${dashboard_domain}:443:127.0.0.1" "https://my.${dashboard_domain}" >/dev/null; then
|
|
fail "Could not load dashboard website with loopback check"
|
|
exit 1
|
|
fi
|
|
|
|
success "dashboard is reachable via loopback"
|
|
}
|
|
|
|
function check_node() {
|
|
expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)"
|
|
current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix
|
|
|
|
if [[ "${current_node_version}" != "${expected_node_version}" ]]; then
|
|
fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}."
|
|
echo "You can try the following to fix the problem:"
|
|
echo " ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node"
|
|
echo " ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm"
|
|
echo " apt remove -y nodejs"
|
|
echo " systemctl restart box"
|
|
exit 1
|
|
fi
|
|
|
|
success "node version is correct"
|
|
}
|
|
|
|
function print_ipv6_disable_howto() {
|
|
echo "Instead of disabling IPv6 globally, you can disable it at an interface level."
|
|
for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
|
|
echo -e "\tsysctl -w net.ipv6.conf.${iface}.disable_ipv6=1"
|
|
done
|
|
|
|
echo "For the above configuration to persist across reboots, you have to add below to /etc/sysctl.conf"
|
|
for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
|
|
echo -e "\tnet.ipv6.conf.${iface}.disable_ipv6=1"
|
|
done
|
|
}
|
|
|
|
function check_ipv6() {
|
|
ipv6_disable=$(cat /sys/module/ipv6/parameters/disable)
|
|
if [[ "${ipv6_disable}" == "1" ]]; then
|
|
fail "IPv6 is disabled in kernel. Cloudron requires IPv6 in kernel"
|
|
print_ipv6_disable_howto
|
|
exit 1
|
|
fi
|
|
|
|
# check if server has IPv6 address
|
|
has_ipv6_address=0
|
|
for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
|
|
if ipv6=$(ip -6 addr show dev ${iface} | grep -o 'inet6 [^ ]*' | awk '{print $2}' | grep -v '^fe80'); then
|
|
[[ -n "${ipv6}" ]] && has_ipv6_address=1
|
|
fi
|
|
done
|
|
|
|
if [[ "${has_ipv6_address}" == "0" ]]; then
|
|
success "IPv6 is enabled in kernel. No public IPv6 address"
|
|
return
|
|
fi
|
|
|
|
if ! ping6 -q -c 1 api.cloudron.io >/dev/null 2>&1; then
|
|
fail "Server has an IPv6 address but api.cloudron.io is unreachable via IPv6 (ping6 -q -c 1 api.cloudron.io)"
|
|
print_ipv6_disable_howto
|
|
exit 1
|
|
fi
|
|
|
|
success "IPv6 is enabled and public IPv6 address is working"
|
|
}
|
|
|
|
function check_docker() {
|
|
if ! systemctl is-active -q docker; then
|
|
info "Docker is down. Trying to restart docker ..."
|
|
systemctl restart docker
|
|
|
|
if ! systemctl is-active -q docker; then
|
|
fail "Docker is still down, please investigate the error using 'journalctl -u docker'"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if ! wait_systemd_service docker; then
|
|
fail "Docker keeps restarting, please investigate the error using 'journalctl -u docker'"
|
|
exit 1
|
|
fi
|
|
|
|
success "docker is running"
|
|
}
|
|
|
|
function check_docker_version() {
|
|
expected_docker_version="$(sed -ne 's/readonly docker_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
|
|
if command -v docker &> /dev/null; then
|
|
current_docker_version="$(docker version --format {{.Client.Version}})"
|
|
else
|
|
current_docker_version="<not found>"
|
|
fi
|
|
|
|
if [[ "${current_docker_version}" != "${expected_docker_version}" ]]; then
|
|
fail "docker version is incorrect. Expecting ${expected_docker_version}. Got ${current_docker_version}."
|
|
echo "Run cloudron-support --fix-docker-version"
|
|
exit 1
|
|
fi
|
|
|
|
success "docker version is correct"
|
|
}
|
|
|
|
function check_node() {
|
|
expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)"
|
|
if command -v node &> /dev/null; then
|
|
current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix
|
|
else
|
|
current_node_version="<not found>"
|
|
fi
|
|
|
|
if [[ "${current_node_version}" != "${expected_node_version}" ]]; then
|
|
fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}."
|
|
echo "You can try the following to fix the problem:"
|
|
echo " ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node"
|
|
echo " ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm"
|
|
echo " systemctl restart box"
|
|
exit 1
|
|
fi
|
|
|
|
success "node version is correct"
|
|
}
|
|
|
|
function check_dashboard_site_domain() {
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
local -r domain_provider=$(mysql -NB -uroot -ppassword -e "SELECT provider FROM box.domains WHERE domain='${dashboard_domain}'" 2>/dev/null)
|
|
|
|
# TODO: check ipv4 and ipv6 separately
|
|
if ! output=$(curl --fail --connect-timeout 10 --max-time 20 -s https://my.${dashboard_domain}); then
|
|
fail "Could not load dashboard domain."
|
|
if [[ "${domain_provider}" == "cloudflare" ]]; then
|
|
echo "Maybe cloudflare proxying is not working. Delete the domain in Cloudflare dashboard and re-add it. This sometimes re-establishes the proxying"
|
|
else
|
|
echo "Hairpin NAT is not working. Please check if your router supports it"
|
|
fi
|
|
exit 1
|
|
fi
|
|
|
|
if ! echo $output | grep -q "Cloudron Dashboard"; then
|
|
fail "https://my.${dashboard_domain} is not the dashboard domain. Check if DNS is set properly to this server"
|
|
host my.${dashboard_domain} 127.0.0.53 # could also result in cloudflare
|
|
exit 1
|
|
fi
|
|
|
|
success "Dashboard is reachable via domain name"
|
|
}
|
|
|
|
function check_expired_domain() {
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
|
|
if ! command -v whois &> /dev/null; then
|
|
info "Domain ${dashboard_domain} expiry check skipped because whois is not installed. Run 'apt install whois' to check"
|
|
return
|
|
fi
|
|
|
|
local -r expdate=$(whois ${dashboard_domain} | egrep -i 'Expiration Date:|Expires on|Expiry Date:' | head -1 | awk '{print $NF}')
|
|
if [[ -z "${expdate}" ]]; then
|
|
warn "Domain ${dashboard_domain} expiry check skipped because whois does not have this information"
|
|
return
|
|
fi
|
|
|
|
local -r expdate_secs=$(date -d"$expdate" +%s)
|
|
local -r curdate_secs="$(date +%s)"
|
|
|
|
if (( curdate_secs > expdate_secs )); then
|
|
fail "Domain ${dashboard_domain} appears to be expired"
|
|
exit 1
|
|
fi
|
|
|
|
success "Domain ${dashboard_domain} is valid and has not expired"
|
|
}
|
|
|
|
function unbound_use_external_dns() {
|
|
local -r conf_file="/etc/unbound/unbound.conf.d/forward-everything.conf"
|
|
|
|
info "To remove the forwarding, please delete $conf_file and 'systemctl restart unbound'"
|
|
|
|
cat > $conf_file <<EOF
|
|
forward-zone:
|
|
name: "."
|
|
forward-addr: 1.1.1.1
|
|
forward-addr: 8.8.8.8
|
|
EOF
|
|
|
|
systemctl restart unbound
|
|
|
|
success "Forwarded all DNS requests to Google (8.8.8.8) & Cloudflare DNS (1.1.1.1)"
|
|
}
|
|
|
|
function disable_dnssec() {
|
|
local -r conf_file="/etc/unbound/unbound.conf.d/disable-dnssec.conf"
|
|
|
|
warn "To reenable DNSSEC, please delete $conf_file and 'systemctl restart unbound'"
|
|
|
|
cat > $conf_file <<EOF
|
|
server:
|
|
val-permissive-mode: yes
|
|
EOF
|
|
|
|
systemctl restart unbound
|
|
|
|
success "DNSSEC Disabled"
|
|
}
|
|
|
|
function print_system() {
|
|
vendor=$(cat /sys/devices/virtual/dmi/id/sys_vendor)
|
|
product=$(cat /sys/devices/virtual/dmi/id/product_name)
|
|
echo "Vendor: ${vendor} Product: ${product}"
|
|
ubuntu_codename=$(lsb_release -cs)
|
|
ubuntu_version=$(lsb_release -rs)
|
|
linux_version=$(uname -r)
|
|
echo "Linux: ${linux_version}"
|
|
echo "Ubuntu: ${ubuntu_codename} ${ubuntu_version}"
|
|
proc_count=$(grep -c ^processor /proc/cpuinfo)
|
|
proc_name=$(lscpu | grep "Model name:" | sed -r 's/Model name:\s{1,}//g')
|
|
echo "Processor: ${proc_name} x ${proc_count}"
|
|
ram_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}')
|
|
echo "RAM: ${ram_kb}KB"
|
|
disk_size=$(LC_ALL=C df -h --output=source,avail / | tail -n1)
|
|
echo "Disk: ${disk_size}"
|
|
}
|
|
|
|
function troubleshoot() {
|
|
# note: disk space test has already been run globally
|
|
print_system
|
|
check_node
|
|
check_ipv6
|
|
check_docker
|
|
check_docker_version
|
|
check_host_mysql
|
|
check_nginx # requires mysql to be checked
|
|
check_dashboard_cert
|
|
check_dashboard_site_loopback # checks website via loopback
|
|
check_db_migrations
|
|
check_services
|
|
check_box
|
|
check_netplan
|
|
check_dns
|
|
check_dashboard_site_domain # check website via domain name
|
|
check_expired_domain
|
|
check_unbound # this is less fatal after 8.0
|
|
}
|
|
|
|
function cleanup_disk_space() {
|
|
read -p "Truncate log files to reclaim space? [y/N] " choice
|
|
choice=${choice:-n}
|
|
if [[ $choice =~ ^[Yy]$ ]]; then
|
|
truncate -s0 /home/yellowtent/platformdata/logs/*/*.log
|
|
rm -f /home/yellowtent/platformdata/logs/*.log.* # delete the log.1, log.2 etc
|
|
fi
|
|
|
|
read -p "Prune docker system resources to reclaim space? [y/N] " choice
|
|
choice=${choice:-n}
|
|
if [[ $choice =~ ^[Yy]$ ]]; then
|
|
docker images prune -fa || true
|
|
fi
|
|
|
|
read -p "Prune docker volumes to reclaim space? [y/N] " choice
|
|
choice=${choice:-n}
|
|
if [[ $choice =~ ^[Yy]$ ]]; then
|
|
for container in $(docker ps --format "{{.ID}}"); do
|
|
docker exec "$container" find /tmp -type f -mtime +1 -delete || true
|
|
docker exec "$container" find /run -type f -mtime +1 -delete || true
|
|
done
|
|
fi
|
|
}
|
|
|
|
function check_disk_space() {
|
|
# check if at least 10mb root partition space is available
|
|
if [[ "`df --output="avail" / | sed -n 2p`" -lt "10240" ]]; then
|
|
echo "No more space left on / (see df -h output)"
|
|
cleanup_disk_space
|
|
fi
|
|
|
|
if [[ "`df --output="avail" / | sed -n 2p`" -lt "10240" ]]; then
|
|
echo "Still no space despite cleaning up. If you have backups (/var/backups) on this disk, delete old backups to free some space"
|
|
exit 1
|
|
fi
|
|
|
|
# check for at least 5mb free /tmp space for the log file
|
|
if [[ "`df --output="avail" /tmp | sed -n 2p`" -lt "5120" ]]; then
|
|
echo "Not enough space left on /tmp"
|
|
echo "Free up some space first by deleting files from /tmp"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
function do_recreate_containers() {
|
|
echo -e "Re-creating addon and app containers\n"
|
|
info "Follow re-create logs in a second terminal with:"
|
|
info "$ tail -f ${logfile}"
|
|
echo ""
|
|
|
|
echo -n "This takes a while ."
|
|
line_count=$(cat "${logfile}" | wc -l)
|
|
sed -e 's/"version": ".*",/"version":"48.0.0",/' -i /home/yellowtent/platformdata/INFRA_VERSION
|
|
systemctl restart -q box # will re-create docker network
|
|
|
|
while ! tail -n "+${line_count}" "${logfile}" | grep -q "platform is ready"; do
|
|
echo -n "."
|
|
sleep 2
|
|
done
|
|
|
|
echo -e "\n\nDone! Addon containers successfully re-created. The apps in the dashboard will"
|
|
echo -e "say 'Configuring (Queued)'. They will come up in a short while.\n"
|
|
}
|
|
|
|
function recreate_containers() {
|
|
readonly logfile="/home/yellowtent/platformdata/logs/box.log"
|
|
|
|
echo "This will re-create all the containers. Apps will go down for a while. No data will be lost."
|
|
read -p "Do you want to proceed? [y/N] " choice
|
|
choice=${choice:-n}
|
|
|
|
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
|
|
|
|
do_recreate_containers
|
|
}
|
|
|
|
function download_docker_images() {
|
|
info "Downloading addon images"
|
|
images=$(node -e "const i = require('/home/yellowtent/box/src/infra_version.js'); console.log(Object.keys(i.images).map(x => i.images[x]).join(' '));")
|
|
|
|
for image in ${images}; do
|
|
info "Pulling ${image}"
|
|
docker pull -q "${image}" &>/dev/null # this pulls the image using the sha256
|
|
docker pull -q "${image%@sha256:*}" &>/dev/null # this will tag the image for readability
|
|
done
|
|
}
|
|
|
|
function ask_reboot() {
|
|
read -p "Do you want to reboot ? [y/N] " choice
|
|
choice=${choice:-n}
|
|
|
|
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
|
|
reboot
|
|
exit
|
|
}
|
|
|
|
function recreate_docker() {
|
|
readonly logfile="/home/yellowtent/platformdata/logs/box.log"
|
|
readonly stagefile="/home/yellowtent/platformdata/recreate-docker-stage"
|
|
readonly containerd_root="/var/lib/containerd"
|
|
|
|
if ! docker_root=$(docker info -f '{{ .DockerRootDir }}' 2>/dev/null); then
|
|
warning "Unable to detect docker root. Assuming /var/lib/docker"
|
|
fi
|
|
[[ -z "${docker_root}" ]] && docker_root="/var/lib/docker"
|
|
|
|
if [[ ! -e "${stagefile}" ]]; then
|
|
echo "Use this command when docker storage (at $docker_root) is corrupt. It will delete"
|
|
echo "the docker storage, re-download docker images and re-create containers. Dashboard and apps"
|
|
echo -e "will be unreachable for a while. No data will be lost.\n"
|
|
echo -e "The server may have to be rebooted twice for this. If so, re-run this command after every reboot.\n"
|
|
read -p "Do you want to proceed? [y/N] " choice
|
|
choice=${choice:-n}
|
|
|
|
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
|
|
|
|
info "Stopping box and docker"
|
|
systemctl stop -q box docker containerd docker.socket || true
|
|
systemctl disable -q box docker containerd docker.socket || true
|
|
|
|
echo -e "clearing_storage" > "${stagefile}" # init
|
|
fi
|
|
|
|
if grep -q "clearing_storage" "${stagefile}"; then
|
|
info "Clearing docker storage at ${docker_root}"
|
|
if ! rm -rf "${docker_root}/"* "${containerd_root}/"*; then
|
|
echo -e "\nThe server has to be rebooted to clear the docker storage. After reboot,"
|
|
echo -e "run 'cloudron-support --recreate-docker' again.\n"
|
|
ask_reboot
|
|
fi
|
|
echo -e "cleared_storage" > "${stagefile}"
|
|
fi
|
|
|
|
if grep -q "cleared_storage" "${stagefile}"; then
|
|
info "Starting docker afresh at ${docker_root}"
|
|
systemctl enable --now -q docker.socket docker containerd
|
|
sleep 5 # give docker some time to initialize the storage directory
|
|
download_docker_images
|
|
echo -e "downloaded_images" > "${stagefile}"
|
|
echo -e "\nThe server has to be rebooted for docker to initialize properly. After reboot,"
|
|
echo -e "run 'cloudron-support --recreate-docker' again.\n" # else docker network is not completely functional
|
|
ask_reboot
|
|
fi
|
|
|
|
if grep -q "downloaded_images" "${stagefile}"; then
|
|
systemctl enable -q box
|
|
do_recreate_containers
|
|
fi
|
|
|
|
rm "${stagefile}"
|
|
}
|
|
|
|
function fix_docker_version() {
|
|
ubuntu_codename=$(lsb_release -cs)
|
|
ubuntu_version=$(lsb_release -rs)
|
|
|
|
docker_version="$(sed -ne 's/readonly docker_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
|
|
containerd_version="$(sed -ne 's/readonly containerd_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
|
|
|
|
echo "downloading docker ${docker_version}"
|
|
# copied from installer.sh
|
|
curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/containerd.io_${containerd_version}-1_amd64.deb" -o /tmp/containerd.deb
|
|
curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/docker-ce-cli_${docker_version}-1~ubuntu.${ubuntu_version}~${ubuntu_codename}_amd64.deb" -o /tmp/docker-ce-cli.deb
|
|
curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/docker-ce_${docker_version}-1~ubuntu.${ubuntu_version}~${ubuntu_codename}_amd64.deb" -o /tmp/docker.deb
|
|
|
|
echo "installing docker"
|
|
apt install -y --allow-downgrades /tmp/containerd.deb /tmp/docker-ce-cli.deb /tmp/docker.deb
|
|
rm /tmp/containerd.deb /tmp/docker-ce-cli.deb /tmp/docker.deb
|
|
}
|
|
|
|
function apply_patch() {
|
|
commit_id="$1"
|
|
patch_file="/tmp/${commit_id}.patch"
|
|
|
|
# gitlab will return 404 if it looks like a valid commit id but doesn't exist. it returns login page with invalid commit id
|
|
if ! curl -s "https://git.cloudron.io/cloudron/box/-/commit/${commit_id}.patch" -D /tmp/headers -o "${patch_file}"; then
|
|
echo "Could not connect to git"
|
|
exit 1
|
|
fi
|
|
|
|
if ! grep -q "content-type: text/plain" /tmp/headers; then
|
|
echo "Not a valid commit"
|
|
exit 1
|
|
fi
|
|
echo "This will apply ${commit_id} (${patch_file}) from git and restart the box code."
|
|
warn "Do not proceed unless you know what you are doing."
|
|
read -p "Do you want to apply the patch? [y/N] " choice
|
|
choice=${choice:-n}
|
|
|
|
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
|
|
|
|
if ! patch --force --dry-run -d /home/yellowtent/box -p1 -i "${patch_file}"; then
|
|
echo "Patch does not apply cleanly"
|
|
exit 1
|
|
fi
|
|
|
|
patch -d /home/yellowtent/box -p1 -i "${patch_file}"
|
|
systemctl restart box
|
|
|
|
echo "Patch applied"
|
|
}
|
|
|
|
function check_db_migrations() {
|
|
local -r last_migration_from_db="$(mysql -NB -uroot -ppassword -e "SELECT name FROM box.migrations ORDER BY run_on DESC LIMIT 1" 2>/dev/null).js"
|
|
local -r last_migration_file="/$(ls --ignore schema.sql --ignore initial-schema.sql /home/yellowtent/box/migrations/ | sort | tail -1)"
|
|
if [[ "${last_migration_from_db}" != "${last_migration_file}" ]]; then
|
|
fail "Database migrations are pending. Last migration in DB: ${last_migration_from_db}. Last migration file: ${last_migration_file}."
|
|
info "Please run 'cloudron-support --apply-db-migrations' to apply the migrations."
|
|
else
|
|
success "No pending database migrations"
|
|
fi
|
|
}
|
|
|
|
function apply_db_migrations() {
|
|
echo "Applying pending database migrations"
|
|
bash /home/yellowtent/box/setup/start.sh && success "Database migrations applied successfully" || fail "Database migrations failed"
|
|
}
|
|
|
|
function check_services() {
|
|
local services=("mysql" "postgresql" "mongodb" "mail" "graphite")
|
|
local service_ip=("172.18.30.1" "172.18.30.2" "172.18.30.3" "172.18.30.4" "172.18.30.5")
|
|
local service_port=("3000" "3000" "3000" "3000" "2003")
|
|
|
|
for service in "${!services[@]}"; do
|
|
# Check if container is running
|
|
if [[ $(docker inspect ${services[$service]} --format={{.State.Status}}) != "running" ]]; then
|
|
fail "Service '${services[$service]}' container is not running!"
|
|
continue
|
|
fi
|
|
|
|
# Check if service is reachable with simple nc check
|
|
if ! nc -z -w5 ${service_ip[$service]} ${service_port[$service]} 2>/dev/null; then
|
|
fail "Service '${services[$service]}' is not reachable"
|
|
continue
|
|
fi
|
|
|
|
# Curl the healthcheck endpoint
|
|
if [[ ${services[$service]} != "graphite" ]]; then
|
|
if ! grep -q "true" <<< $(curl --fail -s "http://${service_ip[$service]}:${service_port[$service]}/healthcheck"); then
|
|
fail "Service '${services[$service]}' healthcheck failed"
|
|
continue
|
|
fi
|
|
else
|
|
# Graphite has a different healthcheck endpoint and needs to be checked differently
|
|
if ! grep -q "Graphite Dashboard" <<< "$(curl --fail -s http://${service_ip[$service]}:8000/graphite-web/dashboard)"; then
|
|
fail "Service '${services[$service]}' healthcheck failed"
|
|
continue
|
|
fi
|
|
fi
|
|
success "Service '${services[$service]}' is running and healthy"
|
|
done
|
|
}
|
|
|
|
check_disk_space
|
|
|
|
args=$(getopt -o "" -l "admin-login,disable-dnssec,enable-remote-support,disable-remote-support,help,owner-login,patch:,recreate-containers,recreate-docker,fix-docker-version,send-diagnostics,unbound-use-external-dns,troubleshoot,check-db-migrations,apply-db-migrations,check-services" -n "$0" -- "$@")
|
|
eval set -- "${args}"
|
|
|
|
while true; do
|
|
case "$1" in
|
|
--enable-ssh)
|
|
# fall through
|
|
;&
|
|
--enable-remote-support) enable_remote_support; exit 0;;
|
|
--disable-remote-support) disable_remote_support; exit 0;;
|
|
--admin-login)
|
|
# fall through
|
|
;&
|
|
--owner-login) owner_login; exit 0;;
|
|
--send-diagnostics) send_diagnostics; exit 0;;
|
|
--troubleshoot) troubleshoot; exit 0;;
|
|
--disable-dnssec) disable_dnssec; exit 0;;
|
|
--unbound-use-external-dns) unbound_use_external_dns; exit 0;;
|
|
--recreate-containers) recreate_containers; exit 0;;
|
|
--recreate-docker) recreate_docker; exit 0;;
|
|
--fix-docker-version) fix_docker_version; exit 0;;
|
|
--check-db-migrations) check_db_migrations; exit 0;;
|
|
--apply-db-migrations) apply_db_migrations; exit 0;;
|
|
--check-services) check_services; exit 0;;
|
|
--patch) apply_patch "$2"; exit 0;;
|
|
--help) break;;
|
|
--) break;;
|
|
*) echo "Unknown option $1"; exit 1;;
|
|
esac
|
|
done
|
|
|
|
echo -e "${HELP_MESSAGE}"
|