972 lines
38 KiB
Bash
Executable File
972 lines
38 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -eu -o pipefail
|
|
|
|
# scripts requires root
|
|
if [[ ${EUID} -ne 0 ]]; then
|
|
echo "This script should be run as root. Run with sudo"
|
|
exit 1
|
|
fi
|
|
|
|
readonly RED='\033[31m'
|
|
readonly GREEN='\033[32m'
|
|
readonly YELLOW='\033[33m'
|
|
readonly BOLD='\033[1m'
|
|
readonly DONE='\033[m'
|
|
|
|
readonly PASTEBIN="https://paste.cloudron.io"
|
|
readonly LINE="\n========================================================\n"
|
|
readonly HELP_MESSAGE="
|
|
Cloudron Support and Diagnostics Tool
|
|
|
|
See https://docs.cloudron.io/troubleshooting for more information on troubleshooting.
|
|
|
|
Options:
|
|
--apply-db-migrations Applies all pending DB migrations
|
|
--check-db-migrations Checks if the DB migrations are up to date
|
|
--check-services Checks if services/addons are running and healthy.
|
|
--disable-dnssec Disable DNSSEC
|
|
--disable-ipv6 Disable IPv6. Use --reenable-ipv6 to re-enable.
|
|
--enable-remote-support Enable SSH Remote Access for the Cloudron support team
|
|
--disable-remote-support Disable SSH Remote Access for the Cloudron support team
|
|
--fix-docker-version Ensures the correct docker version is installed
|
|
--owner-login Login as owner
|
|
--recreate-containers Deletes all existing containers and recreates them without loss of data
|
|
--recreate-docker Deletes docker storage (containers and images) and recreates it without loss of data
|
|
--send-diagnostics Collects server diagnostics and uploads it to ${PASTEBIN}
|
|
--troubleshoot Dashboard down? Run tests to identify the potential problem
|
|
--unbound-forward-dns Unbound is the internal DNS server used for recursive DNS queries. This is only needed
|
|
if your network does not allow outbound DNS requests.
|
|
Options are 'google', 'cloudflare' or comma separated custom server (e.g 8.8.8.8,1.1.1.1).
|
|
--help Show this message
|
|
"
|
|
|
|
function success() {
|
|
echo -e "[${GREEN}OK${DONE}]\t${1}"
|
|
}
|
|
|
|
function info() {
|
|
echo -e "\t${1}"
|
|
}
|
|
|
|
function warn() {
|
|
echo -e "[${YELLOW}WARN${DONE}]\t${1}"
|
|
}
|
|
|
|
function fail() {
|
|
echo -e "[${RED}FAIL${DONE}]\t${1}" >&2
|
|
}
|
|
|
|
function enable_remote_support() {
|
|
local -r cloudron_support_public_key="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGWS+930b8QdzbchGljt3KSljH9wRhYvht8srrtQHdzg support@cloudron.io"
|
|
local -r ssh_user="cloudron-support"
|
|
local -r keys_file="/home/cloudron-support/.ssh/authorized_keys"
|
|
|
|
echo -e "
|
|
================= ${BOLD}SSH ACCESS DISCLAIMER${DONE} =================
|
|
|
|
By granting us SSH or remote access to your systems, you acknowledge and
|
|
agree to the following:
|
|
|
|
1. ${BOLD}Access to Customer Data${DONE}
|
|
Our team may have visibility into customer data during the course of
|
|
investigating or resolving issues. While we take all reasonable steps to
|
|
respect your privacy and handle data securely, you acknowledge that such
|
|
access may occur as part of the support process.
|
|
|
|
2. ${BOLD}No Liability for Data Loss or System Changes${DONE}
|
|
Although we strive to exercise caution and due diligence, you acknowledge
|
|
and accept that:
|
|
${BOLD}-${DONE} There is an inherent risk of data loss, corruption, or system
|
|
disruption during troubleshooting or configuration changes.
|
|
${BOLD}-${DONE} We shall not be held liable for any loss of data, service
|
|
downtime, or unintended consequences arising from our access or any
|
|
actions taken during the support process.
|
|
|
|
3. ${BOLD}Backups and Safeguards${DONE}
|
|
You are solely responsible for ensuring that up-to-date and complete
|
|
backups of your systems and data exist prior to granting us access.
|
|
|
|
4. ${BOLD}Local Changes and Auto-Updates${DONE}
|
|
Your system may receive automatic updates as part of regular maintenance or
|
|
feature releases. Any local modifications or patches applied during support
|
|
may be overwritten by future updates. ${BOLD}-${DONE} It is the customer's
|
|
responsibility to track such changes and reapply them if necessary, or to
|
|
coordinate with us for permanent integration where applicable.
|
|
|
|
5. ${BOLD}Consent to Proceed${DONE}
|
|
By providing access, you confirm that you have read, understood, and agreed
|
|
to the terms above and expressly authorize us to proceed with accessing
|
|
your systems for support purposes.
|
|
|
|
=======================================================================
|
|
"
|
|
read -p "Do you accept these terms? [y/N] " choice
|
|
choice=${choice:-n}
|
|
|
|
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
|
|
|
|
echo -n "Terms accepted. Enabling Remote Access for the Cloudron support team..."
|
|
mkdir -p $(dirname "${keys_file}") # .ssh does not exist sometimes
|
|
touch "${keys_file}" # required for concat to work
|
|
if ! grep -q "${cloudron_support_public_key}" "${keys_file}"; then
|
|
echo -e "\n${cloudron_support_public_key}" >> "${keys_file}"
|
|
chmod 600 "${keys_file}"
|
|
chown "${ssh_user}" "${keys_file}"
|
|
fi
|
|
|
|
echo "Done"
|
|
}
|
|
|
|
function disable_remote_support() {
|
|
local -r cloudron_support_public_key="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGWS+930b8QdzbchGljt3KSljH9wRhYvht8srrtQHdzg support@cloudron.io"
|
|
local -r ssh_user="cloudron-support"
|
|
local -r keys_file="/home/cloudron-support/.ssh/authorized_keys"
|
|
|
|
echo -n "Disabling Remote Access for the Cloudron support team..."
|
|
mkdir -p $(dirname "${keys_file}") # .ssh does not exist sometimes
|
|
touch "${keys_file}" # required for del below to work
|
|
if grep -q "${cloudron_support_public_key}" "${keys_file}"; then
|
|
sed "/${cloudron_support_public_key}/d" -i "${keys_file}"
|
|
fi
|
|
|
|
echo "Done"
|
|
}
|
|
|
|
function wait_systemd_service() {
|
|
local -r service="$1"
|
|
|
|
for i in {1..3}; do
|
|
ts=$(systemctl show "${service}" -p ActiveEnterTimestamp | sed 's/ActiveEnterTimestamp=//g')
|
|
start=$(date '+%s' --date="${ts}")
|
|
now=$(date '+%s')
|
|
|
|
up_time=$(( $now - $start ))
|
|
(( up_time > 10 )) && return 0
|
|
|
|
info "Service '${service}' just started $up_time secs ago, checking health again in 10s"
|
|
sleep 11
|
|
done
|
|
|
|
return 1
|
|
}
|
|
|
|
function check_host_mysql() {
|
|
if ! systemctl is-active -q mysql; then
|
|
info "MySQL is down. Trying to restart MySQL ..."
|
|
|
|
systemctl restart mysql
|
|
|
|
if ! systemctl is-active -q mysql; then
|
|
fail "MySQL is still down, please investigate the error by inspecting /var/log/mysql/error.log"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if ! wait_systemd_service mysql; then
|
|
fail "MySQL keeps restarting, please investigate the error by inspecting /var/log/mysql/error.log"
|
|
exit 1
|
|
fi
|
|
|
|
success "MySQL is running"
|
|
}
|
|
|
|
function check_box() {
|
|
[[ -f /home/yellowtent/box/VERSION ]] && version=$(cat /home/yellowtent/box/VERSION) || version='<unknown>'
|
|
|
|
if ! systemctl is-active -q box; then
|
|
info "box v${version} is down. re-running migration script and restarting it ..."
|
|
|
|
/home/yellowtent/box/setup/start.sh
|
|
systemctl stop box # a restart sometimes doesn't restart, no idea
|
|
systemctl start box
|
|
|
|
if ! systemctl is-active -q box; then
|
|
fail "box service is still down, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if ! wait_systemd_service box; then
|
|
fail "box service keeps restarting, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log"
|
|
exit 1
|
|
fi
|
|
|
|
success "box v${version} is running"
|
|
}
|
|
|
|
function check_netplan() {
|
|
if ! output=$(netplan get all 2>/dev/null); then
|
|
fail "netplan is not working"
|
|
exit 1
|
|
fi
|
|
|
|
if [[ -z "${output}" ]]; then
|
|
warn "netplan configuration is empty. this might be OK depending on your networking setup"
|
|
else
|
|
success "netplan is good"
|
|
fi
|
|
}
|
|
|
|
function owner_login() {
|
|
check_host_mysql >/dev/null
|
|
|
|
local -r owner_username=$(mysql -NB -uroot -ppassword -e "SELECT username FROM box.users WHERE role='owner' AND username IS NOT NULL AND active=1 ORDER BY creationTime LIMIT 1" 2>/dev/null)
|
|
local -r owner_password=$(pwgen -1s 12)
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
mysql -NB -uroot -ppassword -e "INSERT INTO box.settings (name, value) VALUES ('ghosts_config', '{\"${owner_username}\":\"${owner_password}\"}') ON DUPLICATE KEY UPDATE name='ghosts_config', value='{\"${owner_username}\":\"${owner_password}\"}'" 2>/dev/null
|
|
echo "Login at https://my.${dashboard_domain} as ${owner_username} / ${owner_password} . This password may only be used once."
|
|
}
|
|
|
|
function send_diagnostics() {
|
|
local -r log="/tmp/cloudron-support.log"
|
|
|
|
echo -n "Generating Cloudron Support stats..."
|
|
|
|
rm -rf $log
|
|
|
|
echo -e $LINE"Linux"$LINE >> $log
|
|
uname -nar &>> $log
|
|
|
|
echo -e $LINE"Ubuntu"$LINE >> $log
|
|
lsb_release -a &>> $log
|
|
|
|
echo -e $LINE"Cloudron"$LINE >> $log
|
|
cloudron_version=$(cat /home/yellowtent/box/VERSION || true)
|
|
echo -e "Cloudron version: ${cloudron_version}" >> $log
|
|
dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null || true)
|
|
echo -e "Dashboard domain: ${dashboard_domain}" >> $log
|
|
|
|
echo -e $LINE"Docker"$LINE >> $log
|
|
if ! timeout --kill-after 10s 15s docker system info &>> $log 2>&1; then
|
|
echo -e "Docker (system info) is not responding" >> $log
|
|
fi
|
|
|
|
if ! timeout --kill-after 10s 15s docker ps -a &>> $log 2>&1; then
|
|
echo -e "Docker (ps) is not responding" >> $log
|
|
fi
|
|
|
|
echo -e $LINE"Filesystem stats"$LINE >> $log
|
|
if ! timeout --kill-after 10s 15s df -h &>> $log 2>&1; then
|
|
echo -e "df is not responding" >> $log
|
|
fi
|
|
|
|
echo -e $LINE"Appsdata stats"$LINE >> $log
|
|
du -hcsL /home/yellowtent/appsdata/* &>> $log || true
|
|
|
|
echo -e $LINE"Boxdata stats"$LINE >> $log
|
|
du -hcsL /home/yellowtent/boxdata/* &>> $log
|
|
|
|
echo -e $LINE"Backup stats (possibly misleading)"$LINE >> $log
|
|
du -hcsL /var/backups/* &>> $log || true
|
|
|
|
echo -e $LINE"System daemon status"$LINE >> $log
|
|
systemctl status --lines=100 --no-pager --full box mysql unbound cloudron-syslog nginx docker &>> $log
|
|
|
|
echo -e $LINE"Box logs"$LINE >> $log
|
|
tail -n 100 /home/yellowtent/platformdata/logs/box.log &>> $log
|
|
|
|
echo -e $LINE"Interface Info"$LINE >> $log
|
|
ip addr &>> $log
|
|
|
|
echo -e $LINE"Firewall chains"$LINE >> $log
|
|
iptables -L &>> $log
|
|
has_ipv6=$(cat /proc/net/if_inet6 >/dev/null 2>&1 && echo "yes" || echo "no")
|
|
echo -e "IPv6: ${has_ipv6}" >> $log
|
|
[[ "${has_ipv6}" == "yes" ]] && ip6tables -L &>> $log
|
|
|
|
echo "Done"
|
|
|
|
echo -n "Uploading information..."
|
|
paste_key=$(curl -X POST ${PASTEBIN}/documents --silent --data-binary "@$log" | python3 -c "import sys, json; print(json.load(sys.stdin)['key'])")
|
|
echo "Done"
|
|
|
|
echo -e "\nPlease email the following link to support@cloudron.io : ${PASTEBIN}/${paste_key}"
|
|
}
|
|
|
|
function check_dns() {
|
|
if host cloudron.io &>/dev/null; then
|
|
success "DNS is resolving via systemd-resolved"
|
|
return
|
|
fi
|
|
|
|
if ! systemctl is-active -q systemd-resolved; then
|
|
warn "systemd-resolved is not in use. see 'systemctl status systemd-resolved'"
|
|
fi
|
|
|
|
if [[ -L /etc/resolv.conf ]]; then
|
|
target=$(readlink /etc/resolv.conf)
|
|
if [[ "$target" != *"/run/systemd/resolve/stub-resolv.conf" ]]; then
|
|
warn "/etc/resolv.conf is symlinked to $target instead of '../run/systemd/resolve/stub-resolv.conf'"
|
|
fi
|
|
else
|
|
warn "/etc/resolv.conf is not symlinked to '../run/systemd/resolve/stub-resolv.conf'"
|
|
fi
|
|
|
|
if ! grep -q "^nameserver 127.0.0.53" /etc/resolv.conf; then
|
|
warn "/etc/resolv.conf is not using systemd-resolved. it is missing the line 'nameserver 127.0.0.53'"
|
|
fi
|
|
|
|
fail "DNS is not resolving"
|
|
host cloudron.io || true
|
|
exit 1
|
|
}
|
|
|
|
function check_unbound() {
|
|
if ! systemctl is-active -q unbound; then
|
|
info "unbound is down. restarting to see if it fixes it" # unbound-anchor is part of ExecStartPre
|
|
systemctl restart unbound
|
|
|
|
if ! systemctl is-active -q unbound; then
|
|
fail "unbound is still down, please investigate the error using 'journalctl -u unbound'"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if ! wait_systemd_service unbound; then
|
|
fail "unbound service keeps restarting, please investigate the error using 'journalctl -u unbound'"
|
|
exit 1
|
|
fi
|
|
|
|
if ! host cloudron.io 127.0.0.150 &>/dev/null; then
|
|
if ! host -t NS . 198.41.0.4 &>/dev/null; then # the IP is DNS A root server IP
|
|
fail "Unbound is not resolving. Outbound DNS requests are blocked. Use 'cloudron-support --unbound-forward-dns <dns>' to forward DNS requests."
|
|
else
|
|
fail "Unbound is not resolving. However, Outbound DNS requests are not blocked. Investigate output of 'journactl -u unbound'"
|
|
fi
|
|
|
|
host cloudron.io 127.0.0.150
|
|
exit 1
|
|
fi
|
|
|
|
success "unbound is running"
|
|
}
|
|
|
|
function check_dashboard_cert() {
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
local -r nginx_conf_file="/home/yellowtent/platformdata/nginx/applications/dashboard/my.${dashboard_domain}.conf"
|
|
local -r cert_file=$(sed -n -e 's/.*ssl_certificate [[:space:]]\+\(.*\);/\1/p' "${nginx_conf_file}")
|
|
|
|
local -r cert_expiry_date=$(openssl x509 -enddate -noout -in "${cert_file}" | sed -e 's/notAfter=//')
|
|
|
|
if ! openssl x509 -checkend 100 -noout -in "${cert_file}" >/dev/null 2>&1; then
|
|
fail "Certificate has expired. Certificate expired at ${cert_expiry_date}"
|
|
|
|
local -r task_id=$(mysql -NB -uroot -ppassword -e "SELECT id FROM box.tasks WHERE type='checkCerts' ORDER BY id DESC LIMIT 1" 2>/dev/null)
|
|
echo -e "\tPlease check /home/yellowtent/platformdata/logs/tasks/${task_id}.log for last cert renewal logs"
|
|
echo -e "\tCommon issues include expiry of domain's API key OR incoming http port 80 not being open"
|
|
exit 1
|
|
fi
|
|
|
|
success "dashboard cert is valid"
|
|
}
|
|
|
|
function check_nginx() {
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
|
|
# it is possible nginx is running but can't be restarted
|
|
if ! systemctl reload -q nginx; then
|
|
fail "nginx is down. Removing extraneous dashboard domain configs ..."
|
|
|
|
# we had a bug where old dashboard domain config file was kept around
|
|
cd /home/yellowtent/platformdata/nginx/applications/dashboard/ && find . ! -name "my.${dashboard_domain}.conf" -type f -exec rm -f {} +
|
|
|
|
# check if certificates are there. nginx will still start if certs are expired
|
|
# IFS= makes sure it doesn't trim leading and trailing whitespace
|
|
# -r prevents interpretation of \ escapes.
|
|
find /home/yellowtent/platformdata/nginx -type f -name '*.conf' -print0 | while IFS= read -r -d '' conf; do
|
|
cert_file=$(sed -ne 's/[[:blank:]]\+ssl_certificate[[:blank:]]\+\(.*\);/\1/p' "${conf}")
|
|
key_file=$(sed -ne 's/[[:blank:]]\+ssl_certificate_key[[:blank:]]\+\(.*\);/\1/p' "${conf}")
|
|
|
|
if [[ -n "${cert_file}" && ! -f "${cert_file}" ]]; then
|
|
info "${cert_file} does not exist. removing ${conf}"
|
|
rm -f "${conf}"
|
|
fi
|
|
|
|
if [[ -n "${key_file}" && ! -f "${key_file}" ]]; then
|
|
info "${key_file} does not exist. removing ${conf}"
|
|
rm -f "${conf}"
|
|
fi
|
|
done
|
|
|
|
systemctl restart nginx
|
|
|
|
if ! systemctl is-active -q nginx; then
|
|
fail "nginx is still down, please investigate the error by inspecting 'journalctl -u nginx' and /var/log/nginx/error.log"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if ! wait_systemd_service nginx; then
|
|
fail "nginx service keeps restarting, please investigate the error using 'journalctl -u nginx' and /var/log/nginx/error.log"
|
|
exit 1
|
|
fi
|
|
|
|
success "nginx is running"
|
|
}
|
|
|
|
# this confirms that https works properly without any proxy (cloudflare) involved
|
|
function check_dashboard_site_loopback() {
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
|
|
if ! curl --fail -s --resolve "my.${dashboard_domain}:443:127.0.0.1" "https://my.${dashboard_domain}" >/dev/null; then
|
|
fail "Could not load dashboard website with loopback check"
|
|
exit 1
|
|
fi
|
|
|
|
success "dashboard is reachable via loopback"
|
|
}
|
|
|
|
function check_node() {
|
|
expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)"
|
|
current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix
|
|
|
|
if [[ "${current_node_version}" != "${expected_node_version}" ]]; then
|
|
fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}."
|
|
echo "You can try the following to fix the problem:"
|
|
echo " ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node"
|
|
echo " ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm"
|
|
echo " apt remove -y nodejs"
|
|
echo " systemctl restart box"
|
|
exit 1
|
|
fi
|
|
|
|
success "node version is correct"
|
|
}
|
|
|
|
function print_ipv6_disable_howto() {
|
|
echo "Instead of disabling IPv6 globally, you can disable it at an interface level."
|
|
for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
|
|
echo -e "\tsysctl -w net.ipv6.conf.${iface}.disable_ipv6=1"
|
|
done
|
|
|
|
echo "For the above configuration to persist across reboots, you have to add below to /etc/sysctl.conf"
|
|
for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
|
|
echo -e "\tnet.ipv6.conf.${iface}.disable_ipv6=1"
|
|
done
|
|
}
|
|
|
|
function check_ipv6() {
|
|
ipv6_disable=$(cat /sys/module/ipv6/parameters/disable)
|
|
if [[ "${ipv6_disable}" == "1" ]]; then
|
|
fail "IPv6 is disabled in kernel. Cloudron requires IPv6 in kernel"
|
|
print_ipv6_disable_howto
|
|
exit 1
|
|
fi
|
|
|
|
# check if server has IPv6 address
|
|
has_ipv6_address=0
|
|
for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
|
|
if ipv6=$(ip -6 addr show dev ${iface} | grep -o 'inet6 [^ ]*' | awk '{print $2}' | grep -v '^fe80'); then
|
|
[[ -n "${ipv6}" ]] && has_ipv6_address=1
|
|
fi
|
|
done
|
|
|
|
if [[ "${has_ipv6_address}" == "0" ]]; then
|
|
success "IPv6 is enabled in kernel. No public IPv6 address"
|
|
return
|
|
fi
|
|
|
|
if ! ping6 -q -c 1 api.cloudron.io >/dev/null 2>&1; then
|
|
fail "Server has an IPv6 address but api.cloudron.io is unreachable via IPv6 (ping6 -q -c 1 api.cloudron.io)"
|
|
print_ipv6_disable_howto
|
|
exit 1
|
|
fi
|
|
|
|
success "IPv6 is enabled and public IPv6 address is working"
|
|
}
|
|
|
|
function check_docker() {
|
|
if ! systemctl is-active -q docker; then
|
|
info "Docker is down. Trying to restart docker ..."
|
|
systemctl restart docker
|
|
|
|
if ! systemctl is-active -q docker; then
|
|
fail "Docker is still down, please investigate the error using 'journalctl -u docker'"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if ! wait_systemd_service docker; then
|
|
fail "Docker keeps restarting, please investigate the error using 'journalctl -u docker'"
|
|
exit 1
|
|
fi
|
|
|
|
success "docker is running"
|
|
}
|
|
|
|
function check_docker_version() {
|
|
expected_docker_version="$(sed -ne 's/readonly docker_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
|
|
if command -v docker &> /dev/null; then
|
|
current_docker_version="$(docker version --format {{.Client.Version}})"
|
|
else
|
|
current_docker_version="<not found>"
|
|
fi
|
|
|
|
if [[ "${current_docker_version}" != "${expected_docker_version}" ]]; then
|
|
fail "docker version is incorrect. Expecting ${expected_docker_version}. Got ${current_docker_version}."
|
|
echo "Run cloudron-support --fix-docker-version"
|
|
exit 1
|
|
fi
|
|
|
|
success "docker version is correct"
|
|
}
|
|
|
|
function check_node() {
|
|
expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)"
|
|
if command -v node &> /dev/null; then
|
|
current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix
|
|
else
|
|
current_node_version="<not found>"
|
|
fi
|
|
|
|
if [[ "${current_node_version}" != "${expected_node_version}" ]]; then
|
|
fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}."
|
|
echo "You can try the following to fix the problem:"
|
|
echo " ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node"
|
|
echo " ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm"
|
|
echo " systemctl restart box"
|
|
exit 1
|
|
fi
|
|
|
|
success "node version is correct"
|
|
}
|
|
|
|
function check_dashboard_site_domain() {
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
local -r domain_provider=$(mysql -NB -uroot -ppassword -e "SELECT provider FROM box.domains WHERE domain='${dashboard_domain}'" 2>/dev/null)
|
|
|
|
# TODO: check ipv4 and ipv6 separately
|
|
if ! output=$(curl --fail --connect-timeout 10 --max-time 20 -s https://my.${dashboard_domain}); then
|
|
fail "Could not load dashboard domain."
|
|
if [[ "${domain_provider}" == "cloudflare" ]]; then
|
|
echo "Maybe cloudflare proxying is not working. Delete the domain in Cloudflare dashboard and re-add it. This sometimes re-establishes the proxying"
|
|
else
|
|
echo "Hairpin NAT is not working. Please check if your router supports it"
|
|
fi
|
|
exit 1
|
|
fi
|
|
|
|
if ! echo $output | grep -q "Cloudron Dashboard"; then
|
|
fail "https://my.${dashboard_domain} is not the dashboard domain. Check if DNS is set properly to this server"
|
|
host my.${dashboard_domain} 127.0.0.53 # could also result in cloudflare
|
|
exit 1
|
|
fi
|
|
|
|
success "Dashboard is reachable via domain name"
|
|
}
|
|
|
|
function check_expired_domain() {
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
|
|
if ! command -v whois &> /dev/null; then
|
|
info "Domain ${dashboard_domain} expiry check skipped because whois is not installed. Run 'apt install whois' to check"
|
|
return
|
|
fi
|
|
|
|
local -r expdate=$(whois ${dashboard_domain} | egrep -i 'Expiration Date:|Expires on|Expiry Date:' | head -1 | awk '{print $NF}')
|
|
if [[ -z "${expdate}" ]]; then
|
|
warn "Domain ${dashboard_domain} expiry check skipped because whois does not have this information"
|
|
return
|
|
fi
|
|
|
|
local -r expdate_secs=$(date -d"$expdate" +%s)
|
|
local -r curdate_secs="$(date +%s)"
|
|
|
|
if (( curdate_secs > expdate_secs )); then
|
|
fail "Domain ${dashboard_domain} appears to be expired"
|
|
exit 1
|
|
fi
|
|
|
|
success "Domain ${dashboard_domain} is valid and has not expired"
|
|
}
|
|
|
|
function unbound_forward_dns() {
|
|
local -r conf_file="/etc/unbound/unbound.conf.d/forward-everything.conf"
|
|
|
|
info "To remove the forwarding, please delete $conf_file and 'systemctl restart unbound'"
|
|
|
|
case "$1" in
|
|
google) ns_list="8.8.8.8,4.4.4.4";; # https://developers.google.com/speed/public-dns
|
|
cloudflare) ns_list="1.1.1.1,1.0.0.1";; # https://developers.cloudflare.com/1.1.1.1/ip-addresses/
|
|
*) ns_list="$1";;
|
|
esac
|
|
|
|
forward_addrs=$(echo "$ns_list" | tr ',' '\n' | sed 's/^/ forward-addr: /')
|
|
|
|
cat > $conf_file <<EOF
|
|
forward-zone:
|
|
name: "."
|
|
${forward_addrs}
|
|
EOF
|
|
|
|
systemctl restart unbound
|
|
|
|
success "Forwarded all DNS requests to ${ns_list}"
|
|
}
|
|
|
|
function disable_dnssec() {
|
|
local -r conf_file="/etc/unbound/unbound.conf.d/disable-dnssec.conf"
|
|
|
|
warn "To reenable DNSSEC, please delete $conf_file and 'systemctl restart unbound'"
|
|
|
|
cat > $conf_file <<EOF
|
|
server:
|
|
val-permissive-mode: yes
|
|
EOF
|
|
|
|
systemctl restart unbound
|
|
|
|
success "DNSSEC Disabled"
|
|
}
|
|
|
|
function print_system() {
|
|
vendor=$(cat /sys/devices/virtual/dmi/id/sys_vendor)
|
|
product=$(cat /sys/devices/virtual/dmi/id/product_name)
|
|
echo "Vendor: ${vendor} Product: ${product}"
|
|
ubuntu_codename=$(lsb_release -cs)
|
|
ubuntu_version=$(lsb_release -rs)
|
|
linux_version=$(uname -r)
|
|
echo "Linux: ${linux_version}"
|
|
echo "Ubuntu: ${ubuntu_codename} ${ubuntu_version}"
|
|
proc_count=$(grep -c ^processor /proc/cpuinfo)
|
|
proc_name=$(lscpu | grep "Model name:" | sed -r 's/Model name:\s{1,}//g')
|
|
env_type=$(systemd-detect-virt || echo none) # systemd-detect-virt --list gives all the possible options
|
|
echo "Execution environment: ${env_type}"
|
|
echo "Processor: ${proc_name} x ${proc_count}"
|
|
ram_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}')
|
|
echo "RAM: ${ram_kb}KB"
|
|
disk_size=$(LC_ALL=C df -h --output=source,avail / | tail -n1)
|
|
echo "Disk: ${disk_size}"
|
|
}
|
|
|
|
function troubleshoot() {
|
|
# note: disk space test has already been run globally
|
|
print_system
|
|
check_node
|
|
check_ipv6
|
|
check_docker
|
|
check_docker_version
|
|
check_host_mysql
|
|
check_netplan
|
|
check_dns
|
|
check_unbound # this is less fatal after 8.0
|
|
|
|
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
|
|
|
|
if [[ -z "${dashboard_domain}" ]]; then
|
|
[[ -f /home/yellowtent/box/VERSION ]] && version=$(cat /home/yellowtent/box/VERSION) || version='<unknown>'
|
|
warn "Cloudron v${version} has not been set up yet. Visit https://<IP> to set up the dashboard."
|
|
else
|
|
check_nginx # requires mysql to be checked
|
|
check_dashboard_cert
|
|
check_dashboard_site_loopback # checks website via loopback
|
|
check_db_migrations
|
|
check_services
|
|
check_box
|
|
|
|
check_dashboard_site_domain # check website via domain name
|
|
check_expired_domain
|
|
fi
|
|
}
|
|
|
|
function cleanup_disk_space() {
|
|
read -p "Truncate log files to reclaim space? [y/N] " choice
|
|
choice=${choice:-n}
|
|
if [[ $choice =~ ^[Yy]$ ]]; then
|
|
truncate -s0 /home/yellowtent/platformdata/logs/*/*.log
|
|
rm -f /home/yellowtent/platformdata/logs/*.log.* # delete the log.1, log.2 etc
|
|
fi
|
|
|
|
read -p "Prune docker system resources to reclaim space? [y/N] " choice
|
|
choice=${choice:-n}
|
|
if [[ $choice =~ ^[Yy]$ ]]; then
|
|
docker images prune -fa || true
|
|
fi
|
|
|
|
read -p "Prune docker volumes to reclaim space? [y/N] " choice
|
|
choice=${choice:-n}
|
|
if [[ $choice =~ ^[Yy]$ ]]; then
|
|
for container in $(docker ps --format "{{.ID}}"); do
|
|
docker exec "$container" find /tmp -type f -mtime +1 -delete || true
|
|
docker exec "$container" find /run -type f -mtime +1 -delete || true
|
|
done
|
|
fi
|
|
}
|
|
|
|
function check_disk_space() {
|
|
# check if at least 10mb root partition space is available
|
|
if [[ "`df --output="avail" / | sed -n 2p`" -lt "10240" ]]; then
|
|
echo "No more space left on / (see df -h output)"
|
|
cleanup_disk_space
|
|
fi
|
|
|
|
if [[ "`df --output="avail" / | sed -n 2p`" -lt "10240" ]]; then
|
|
echo "Still no space despite cleaning up. If you have backups (/var/backups) on this disk, delete old backups to free some space"
|
|
exit 1
|
|
fi
|
|
|
|
# check for at least 5mb free /tmp space for the log file
|
|
if [[ "`df --output="avail" /tmp | sed -n 2p`" -lt "5120" ]]; then
|
|
echo "Not enough space left on /tmp"
|
|
echo "Free up some space first by deleting files from /tmp"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
function do_recreate_containers() {
|
|
echo -e "Re-creating addon and app containers\n"
|
|
info "Follow re-create logs in a second terminal with:"
|
|
info "$ tail -f ${logfile}"
|
|
echo ""
|
|
|
|
echo -n "This takes a while ."
|
|
line_count=$(cat "${logfile}" | wc -l)
|
|
sed -e 's/"version": ".*",/"version":"48.0.0",/' -i /home/yellowtent/platformdata/INFRA_VERSION
|
|
systemctl restart -q box # will re-create docker network
|
|
|
|
while ! tail -n "+${line_count}" "${logfile}" | grep -q "platform is ready"; do
|
|
echo -n "."
|
|
sleep 2
|
|
done
|
|
|
|
echo -e "\n\nDone! Addon containers successfully re-created. The apps in the dashboard will"
|
|
echo -e "say 'Configuring (Queued)'. They will come up in a short while.\n"
|
|
}
|
|
|
|
function recreate_containers() {
|
|
readonly logfile="/home/yellowtent/platformdata/logs/box.log"
|
|
|
|
echo "This will re-create all the containers. Apps will go down for a while. No data will be lost."
|
|
read -p "Do you want to proceed? [y/N] " choice
|
|
choice=${choice:-n}
|
|
|
|
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
|
|
|
|
do_recreate_containers
|
|
}
|
|
|
|
function download_docker_images() {
|
|
info "Downloading addon images"
|
|
|
|
images=$(node --input-type=module -e "import i from '/home/yellowtent/box/src/infra_version.js'; console.log(Object.keys(i.images).map(x => i.images[x]).join(' '));")
|
|
|
|
for image_ref in ${images}; do
|
|
info "Pulling ${image_ref}"
|
|
|
|
ipv4_image_ref="${image_ref/registry.docker.com/registry.ipv4.docker.com}"
|
|
ipv6_image_ref="${image_ref/registry.docker.com/registry.ipv6.docker.com}"
|
|
|
|
while true; do
|
|
# try IPv4 first
|
|
if timeout --kill-after=10s 1200s docker pull "${ipv4_image_ref}"; then
|
|
docker tag "${ipv4_image_ref}" "${image_ref%@sha256:*}"
|
|
docker rmi "${ipv4_image_ref}" >/dev/null 2>&1 || true
|
|
break
|
|
fi
|
|
|
|
info "Could not pull ${ipv4_image_ref}, trying IPv6"
|
|
|
|
# fallback to IPv6
|
|
if timeout --kill-after=10s 1200s docker pull "${ipv6_image_ref}"; then
|
|
docker tag "${ipv6_image_ref}" "${image_ref%@sha256:*}"
|
|
docker rmi "${ipv6_image_ref}" >/dev/null 2>&1 || true
|
|
break
|
|
fi
|
|
|
|
info "Could not pull ${ipv6_image_ref} either, retrying in 10s"
|
|
sleep 10
|
|
done
|
|
done
|
|
}
|
|
|
|
function ask_reboot() {
|
|
read -p "Do you want to reboot ? [y/N] " choice
|
|
choice=${choice:-n}
|
|
|
|
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
|
|
reboot
|
|
exit
|
|
}
|
|
|
|
function recreate_docker() {
|
|
readonly logfile="/home/yellowtent/platformdata/logs/box.log"
|
|
readonly stagefile="/home/yellowtent/platformdata/recreate-docker-stage"
|
|
readonly containerd_root="/var/lib/containerd"
|
|
|
|
if ! docker_root=$(docker info -f '{{ .DockerRootDir }}' 2>/dev/null); then
|
|
warn "Unable to detect docker root. Assuming /var/lib/docker"
|
|
fi
|
|
[[ -z "${docker_root}" ]] && docker_root="/var/lib/docker"
|
|
|
|
if [[ ! -e "${stagefile}" ]]; then
|
|
echo "Use this command when docker storage (at $docker_root) is corrupt. It will delete"
|
|
echo "the docker storage, re-download docker images and re-create containers. Dashboard and apps"
|
|
echo -e "will be unreachable for a while. No data will be lost.\n"
|
|
echo -e "The server may have to be rebooted twice for this. If so, re-run this command after every reboot.\n"
|
|
read -p "Do you want to proceed? [y/N] " choice
|
|
choice=${choice:-n}
|
|
|
|
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
|
|
|
|
info "Stopping box and docker"
|
|
systemctl stop -q box docker containerd docker.socket || true
|
|
systemctl disable -q box docker containerd docker.socket || true
|
|
|
|
echo -e "clearing_storage" > "${stagefile}" # init
|
|
fi
|
|
|
|
if grep -q "clearing_storage" "${stagefile}"; then
|
|
info "Clearing docker storage at ${docker_root}"
|
|
if ! rm -rf "${docker_root}/"* "${containerd_root}/"*; then
|
|
echo -e "\nThe server has to be rebooted to clear the docker storage. After reboot,"
|
|
echo -e "run 'cloudron-support --recreate-docker' again.\n"
|
|
ask_reboot
|
|
fi
|
|
echo -e "cleared_storage" > "${stagefile}"
|
|
fi
|
|
|
|
if grep -q "cleared_storage" "${stagefile}"; then
|
|
info "Starting docker afresh at ${docker_root}"
|
|
systemctl enable --now -q docker.socket docker containerd
|
|
sleep 5 # give docker some time to initialize the storage directory
|
|
download_docker_images
|
|
echo -e "downloaded_images" > "${stagefile}"
|
|
echo -e "\nThe server has to be rebooted for docker to initialize properly. After reboot,"
|
|
echo -e "run 'cloudron-support --recreate-docker' again.\n" # else docker network is not completely functional
|
|
ask_reboot
|
|
fi
|
|
|
|
if grep -q "downloaded_images" "${stagefile}"; then
|
|
systemctl enable -q box
|
|
do_recreate_containers
|
|
fi
|
|
|
|
rm "${stagefile}"
|
|
}
|
|
|
|
function fix_docker_version() {
|
|
ubuntu_codename=$(lsb_release -cs)
|
|
ubuntu_version=$(lsb_release -rs)
|
|
|
|
docker_version="$(sed -ne 's/readonly docker_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
|
|
containerd_version="$(sed -ne 's/readonly containerd_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
|
|
|
|
echo "downloading docker ${docker_version}"
|
|
# copied from installer.sh
|
|
curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/containerd.io_${containerd_version}-1_amd64.deb" -o /tmp/containerd.deb
|
|
curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/docker-ce-cli_${docker_version}-1~ubuntu.${ubuntu_version}~${ubuntu_codename}_amd64.deb" -o /tmp/docker-ce-cli.deb
|
|
curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/docker-ce_${docker_version}-1~ubuntu.${ubuntu_version}~${ubuntu_codename}_amd64.deb" -o /tmp/docker.deb
|
|
|
|
echo "installing docker"
|
|
apt install -y --allow-downgrades /tmp/containerd.deb /tmp/docker-ce-cli.deb /tmp/docker.deb
|
|
rm /tmp/containerd.deb /tmp/docker-ce-cli.deb /tmp/docker.deb
|
|
}
|
|
|
|
function check_db_migrations() {
|
|
local -r last_migration_from_db="$(mysql -NB -uroot -ppassword -e "SELECT name FROM box.migrations ORDER BY run_on DESC, name DESC LIMIT 1" 2>/dev/null).js"
|
|
local -r last_migration_file="/$(ls --ignore schema.sql --ignore package.json --ignore initial-schema.sql /home/yellowtent/box/migrations/ | sort | tail -1)"
|
|
if [[ "${last_migration_from_db}" != "${last_migration_file}" ]]; then
|
|
fail "Database migrations are pending. Last migration in DB: ${last_migration_from_db}. Last migration file: ${last_migration_file}."
|
|
info "Please run 'cloudron-support --apply-db-migrations' to apply the migrations."
|
|
else
|
|
success "No pending database migrations"
|
|
fi
|
|
}
|
|
|
|
function apply_db_migrations() {
|
|
echo "Applying pending database migrations"
|
|
bash /home/yellowtent/box/setup/start.sh && success "Database migrations applied successfully" || fail "Database migrations failed"
|
|
}
|
|
|
|
function check_services() {
|
|
local services=("mysql" "postgresql" "mongodb" "mail" "graphite" "sftp")
|
|
local service_ip=("172.18.30.1" "172.18.30.2" "172.18.30.3" "172.18.30.4" "172.18.30.5" "172.18.30.6")
|
|
local service_port=("3000" "3000" "3000" "3000" "2003" "3000")
|
|
|
|
for service in "${!services[@]}"; do
|
|
local service_name="${services[$service]}"
|
|
local service_state
|
|
if ! service_state="$(docker inspect "${service_name}" --format={{.State.Status}} 2>/dev/null)"; then
|
|
service_state="missing"
|
|
fi
|
|
if [[ "${service_state}" != "running" ]]; then
|
|
if [[ "${service_state}" == "exited" ]] && [[ "${service_name}" == "mysql" || "${service_name}" == "postgresql" || "${service_name}" == "mongodb" ]]; then
|
|
warn "Service '${service_name}' is not running (may be lazy-stopped)"
|
|
else
|
|
fail "Service '${service_name}' container is not running (state: ${service_state})!"
|
|
fi
|
|
continue
|
|
fi
|
|
|
|
# avoid nc since it is not part of the base install
|
|
if ! timeout 5 bash -c "</dev/tcp/${service_ip[$service]}/${service_port[$service]}" 2>/dev/null; then
|
|
fail "Service '${services[$service]}' is not reachable"
|
|
continue
|
|
fi
|
|
|
|
if [[ ${services[$service]} != "graphite" ]]; then
|
|
if ! grep -q "true" <<< $(curl --fail -s "http://${service_ip[$service]}:${service_port[$service]}/healthcheck"); then
|
|
fail "Service '${services[$service]}' healthcheck failed"
|
|
continue
|
|
fi
|
|
else
|
|
if ! grep -q "Graphite Dashboard" <<< "$(curl --fail -s http://${service_ip[$service]}:8000/graphite-web/dashboard)"; then
|
|
fail "Service '${services[$service]}' healthcheck failed"
|
|
continue
|
|
fi
|
|
fi
|
|
success "Service '${services[$service]}' is running and healthy"
|
|
done
|
|
}
|
|
|
|
disable_ipv6_persistent() {
|
|
cat > /etc/sysctl.d/99-disable-ipv6.conf <<'EOF'
|
|
net.ipv6.conf.all.disable_ipv6 = 1
|
|
net.ipv6.conf.default.disable_ipv6 = 1
|
|
net.ipv6.conf.lo.disable_ipv6 = 1
|
|
EOF
|
|
sysctl --system
|
|
}
|
|
|
|
reenable_ipv6_persistent() {
|
|
rm -f /etc/sysctl.d/99-disable-ipv6.conf
|
|
sysctl --system
|
|
}
|
|
|
|
check_disk_space
|
|
|
|
args=$(getopt -o "" -l "admin-login,disable-dnssec,enable-remote-support,disable-remote-support,help,owner-login,recreate-containers,recreate-docker,fix-docker-version,send-diagnostics,unbound-forward-dns:,troubleshoot,check-db-migrations,apply-db-migrations,check-services,disable-ipv6,reenable-ipv6" -n "$0" -- "$@")
|
|
eval set -- "${args}"
|
|
|
|
while true; do
|
|
case "$1" in
|
|
--enable-ssh)
|
|
# fall through
|
|
;&
|
|
--enable-remote-support) enable_remote_support; exit 0;;
|
|
--disable-remote-support) disable_remote_support; exit 0;;
|
|
--admin-login)
|
|
# fall through
|
|
;&
|
|
--owner-login) owner_login; exit 0;;
|
|
--send-diagnostics) send_diagnostics; exit 0;;
|
|
--troubleshoot) troubleshoot; exit 0;;
|
|
--disable-dnssec) disable_dnssec; exit 0;;
|
|
--unbound-forward-dns) unbound_forward_dns "$2"; exit 0;;
|
|
--recreate-containers) recreate_containers; exit 0;;
|
|
--recreate-docker) recreate_docker; exit 0;;
|
|
--fix-docker-version) fix_docker_version; exit 0;;
|
|
--check-db-migrations) check_db_migrations; exit 0;;
|
|
--apply-db-migrations) apply_db_migrations; exit 0;;
|
|
--check-services) check_services; exit 0;;
|
|
--disable-ipv6) disable_ipv6_persistent; exit 0;;
|
|
--reenable-ipv6) reenable_ipv6_persistent; exit 0;;
|
|
--help) break;;
|
|
--) break;;
|
|
*) echo "Unknown option $1"; exit 1;;
|
|
esac
|
|
done
|
|
|
|
echo -e "${HELP_MESSAGE}"
|