2018-11-07 17:37:16 +01:00
#!/bin/bash
2019-05-21 09:21:22 -07:00
set -eu -o pipefail
2023-12-13 16:43:12 +01:00
# scripts requires root
2018-11-07 17:37:16 +01:00
if [[ ${EUID} -ne 0 ]]; then
2019-05-21 09:44:58 -07:00
echo "This script should be run as root. Run with sudo"
2018-11-07 17:37:16 +01:00
exit 1
fi
2023-12-14 17:04:45 +01:00
readonly RED='\033[31m'
readonly GREEN='\033[32m'
readonly YELLOW='\033[33m'
2025-07-11 10:33:22 +02:00
readonly BOLD='\033[1m'
2023-12-14 17:04:45 +01:00
readonly DONE='\033[m'
2023-12-13 16:43:12 +01:00
readonly PASTEBIN="https://paste.cloudron.io"
readonly LINE="\n========================================================\n"
readonly HELP_MESSAGE="
Cloudron Support and Diagnostics Tool
2025-06-15 13:08:54 +02:00
2024-12-18 10:52:51 +01:00
See https://docs.cloudron.io/troubleshooting for more information on troubleshooting.
2023-12-13 16:43:12 +01:00
Options:
2024-08-20 16:51:56 +02:00
--disable-dnssec Disable DNSSEC
2025-04-07 17:53:11 +02:00
--enable-remote-support Enable SSH Remote Access for the Cloudron support team
2025-07-11 10:33:22 +02:00
--disable-remote-support Disable SSH Remote Access for the Cloudron support team
2025-06-24 16:53:42 +02:00
--fix-docker-version Ensures the correct docker version is installed
--owner-login Login as owner
2024-08-20 16:51:56 +02:00
--patch Apply a patch from git. WARNING: Do not use unless you know what you are doing!
--recreate-containers Deletes all existing containers and recreates them without loss of data
--recreate-docker Deletes docker storage (containers and images) and recreates it without loss of data
--send-diagnostics Collects server diagnostics and uploads it to ${PASTEBIN}
--troubleshoot Dashboard down? Run tests to identify the potential problem
--unbound-use-external-dns Forwards all Unbound requests to Google (8.8.8.8) and Cloudflare (1.1.1.1) DNS servers.
Unbound is the internal DNS server used for recursive DNS queries. This is only needed
if your network does not allow outbound DNS requests.
--help Show this message
2023-12-13 16:43:12 +01:00
"
2022-04-28 11:31:18 +02:00
2023-12-14 17:04:45 +01:00
function success() {
echo -e "[${GREEN}OK${DONE}]\t${1}"
}
function info() {
2023-12-14 17:28:30 +01:00
echo -e "\t${1}"
2023-12-14 17:04:45 +01:00
}
function warn() {
echo -e "[${YELLOW}WARN${DONE}]\t${1}"
}
function fail() {
2024-01-03 22:01:48 +01:00
echo -e "[${RED}FAIL${DONE}]\t${1}" >&2
2023-12-14 17:04:45 +01:00
}
2024-11-08 16:01:30 +01:00
function enable_remote_support() {
2023-12-13 16:43:12 +01:00
local -r cloudron_support_public_key="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGWS+930b8QdzbchGljt3KSljH9wRhYvht8srrtQHdzg support@cloudron.io"
local -r ssh_user="cloudron-support"
local -r keys_file="/home/cloudron-support/.ssh/authorized_keys"
2022-04-28 11:31:18 +02:00
2025-07-11 10:33:22 +02:00
echo -e "
================= ${BOLD}SSH ACCESS DISCLAIMER${DONE} =================
By granting us SSH or remote access to your systems, you acknowledge and
agree to the following:
1. ${BOLD}Access to Customer Data${DONE}
Our team may have visibility into customer data during the course of
investigating or resolving issues. While we take all reasonable steps to
respect your privacy and handle data securely, you acknowledge that such
access may occur as part of the support process.
2. ${BOLD}No Liability for Data Loss or System Changes${DONE}
Although we strive to exercise caution and due diligence, you acknowledge
and accept that:
${BOLD}-${DONE} There is an inherent risk of data loss, corruption, or system
disruption during troubleshooting or configuration changes.
${BOLD}-${DONE} We shall not be held liable for any loss of data, service
downtime, or unintended consequences arising from our access or any
actions taken during the support process.
3. ${BOLD}Backups and Safeguards${DONE}
You are solely responsible for ensuring that up-to-date and complete
backups of your systems and data exist prior to granting us access.
4. ${BOLD}Local Changes and Auto-Updates${DONE}
Your system may receive automatic updates as part of regular maintenance or
feature releases. Any local modifications or patches applied during support
may be overwritten by future updates. ${BOLD}-${DONE} It is the customer's
responsibility to track such changes and reapply them if necessary, or to
coordinate with us for permanent integration where applicable.
5. ${BOLD}Consent to Proceed${DONE}
By providing access, you confirm that you have read, understood, and agreed
to the terms above and expressly authorize us to proceed with accessing
your systems for support purposes.
=======================================================================
"
read -p "Do you accept these terms? [y/N] " choice
choice=${choice:-n}
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
echo -n "Terms accepted. Enabling Remote Access for the Cloudron support team..."
2022-04-28 11:31:18 +02:00
mkdir -p $(dirname "${keys_file}") # .ssh does not exist sometimes
touch "${keys_file}" # required for concat to work
2023-12-13 16:43:12 +01:00
if ! grep -q "${cloudron_support_public_key}" "${keys_file}"; then
echo -e "\n${cloudron_support_public_key}" >> "${keys_file}"
2022-04-28 11:31:18 +02:00
chmod 600 "${keys_file}"
chown "${ssh_user}" "${keys_file}"
fi
echo "Done"
2023-12-13 16:43:12 +01:00
}
2022-04-28 11:31:18 +02:00
2025-07-11 10:33:22 +02:00
function disable_remote_support() {
local -r cloudron_support_public_key="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGWS+930b8QdzbchGljt3KSljH9wRhYvht8srrtQHdzg support@cloudron.io"
local -r ssh_user="cloudron-support"
local -r keys_file="/home/cloudron-support/.ssh/authorized_keys"
echo -n "Disabling Remote Access for the Cloudron support team..."
mkdir -p $(dirname "${keys_file}") # .ssh does not exist sometimes
touch "${keys_file}" # required for del below to work
if grep -q "${cloudron_support_public_key}" "${keys_file}"; then
2025-07-11 12:25:50 +02:00
sed "/${cloudron_support_public_key}/d" -i "${keys_file}"
2025-07-11 10:33:22 +02:00
fi
echo "Done"
}
2024-04-22 17:43:11 +02:00
function wait_systemd_service() {
local -r service="$1"
for i in {1..3}; do
ts=$(systemctl show "${service}" -p ActiveEnterTimestamp | sed 's/ActiveEnterTimestamp=//g')
start=$(date '+%s' --date="${ts}")
now=$(date '+%s')
up_time=$(( $now - $start ))
(( up_time > 10 )) && return 0
info "Service '${service}' just started $up_time secs ago, checking health again in 10s"
2024-04-23 10:00:06 +02:00
sleep 11
2024-04-22 17:43:11 +02:00
done
return 1
}
2023-12-14 16:53:34 +01:00
function check_host_mysql() {
if ! systemctl is-active -q mysql; then
2023-12-14 17:28:30 +01:00
info "MySQL is down. Trying to restart MySQL ..."
2023-12-14 16:53:34 +01:00
systemctl restart mysql
if ! systemctl is-active -q mysql; then
2023-12-14 17:04:45 +01:00
fail "MySQL is still down, please investigate the error by inspecting /var/log/mysql/error.log"
2023-12-14 16:53:34 +01:00
exit 1
fi
fi
2024-04-22 17:43:11 +02:00
if ! wait_systemd_service mysql; then
fail "MySQL keeps restarting, please investigate the error by inspecting /var/log/mysql/error.log"
exit 1
fi
2023-12-14 17:04:45 +01:00
success "MySQL is running"
2023-12-14 16:53:34 +01:00
}
2023-12-15 15:45:29 +01:00
function check_box() {
2024-04-17 09:26:32 +02:00
[[ -f /home/yellowtent/box/VERSION ]] && version=$(cat /home/yellowtent/box/VERSION) || version='<unknown>'
2023-12-15 15:45:29 +01:00
if ! systemctl is-active -q box; then
2024-04-17 09:26:32 +02:00
info "box v${version} is down. re-running migration script and restarting it ..."
2023-12-15 15:45:29 +01:00
/home/yellowtent/box/setup/start.sh
systemctl stop box # a restart sometimes doesn't restart, no idea
systemctl start box
if ! systemctl is-active -q box; then
2024-04-22 17:43:11 +02:00
fail "box service is still down, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log"
2023-12-15 15:45:29 +01:00
exit 1
fi
fi
2024-04-22 17:43:11 +02:00
if ! wait_systemd_service box; then
fail "box service keeps restarting, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log"
exit 1
fi
2024-04-17 09:26:32 +02:00
success "box v${version} is running"
2023-12-15 15:45:29 +01:00
}
2024-07-01 08:24:01 +02:00
function check_netplan() {
if ! output=$(netplan get all 2>/dev/null); then
fail "netplan is not working"
exit 1
fi
if [[ -z "${output}" ]]; then
2024-08-20 16:51:56 +02:00
warn "netplan configuration is empty. this might be OK depending on your networking setup"
else
success "netplan is good"
2024-07-01 08:24:01 +02:00
fi
}
2023-12-13 16:43:12 +01:00
function owner_login() {
2024-01-03 22:01:48 +01:00
check_host_mysql >/dev/null
2023-12-14 16:53:34 +01:00
2023-12-13 16:43:12 +01:00
local -r owner_username=$(mysql -NB -uroot -ppassword -e "SELECT username FROM box.users WHERE role='owner' AND username IS NOT NULL AND active=1 ORDER BY creationTime LIMIT 1" 2>/dev/null)
local -r owner_password=$(pwgen -1s 12)
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
mysql -NB -uroot -ppassword -e "INSERT INTO box.settings (name, value) VALUES ('ghosts_config', '{\"${owner_username}\":\"${owner_password}\"}') ON DUPLICATE KEY UPDATE name='ghosts_config', value='{\"${owner_username}\":\"${owner_password}\"}'" 2>/dev/null
echo "Login at https://my.${dashboard_domain} as ${owner_username} / ${owner_password} . This password may only be used once."
}
2022-04-28 11:31:18 +02:00
2023-12-13 16:58:30 +01:00
function send_diagnostics() {
2023-12-13 16:43:12 +01:00
local -r log="/tmp/cloudron-support.log"
2018-11-07 17:37:16 +01:00
2023-12-13 16:43:12 +01:00
echo -n "Generating Cloudron Support stats..."
2018-11-07 17:37:16 +01:00
2023-12-13 16:43:12 +01:00
rm -rf $log
2023-05-10 09:05:28 +02:00
2023-12-13 16:43:12 +01:00
echo -e $LINE"Linux"$LINE >> $log
uname -nar &>> $log
2023-05-10 09:05:28 +02:00
2023-12-13 16:43:12 +01:00
echo -e $LINE"Ubuntu"$LINE >> $log
lsb_release -a &>> $log
2021-02-17 20:27:46 -08:00
2024-02-21 12:51:50 +01:00
echo -e $LINE"Cloudron"$LINE >> $log
cloudron_version=$(cat /home/yellowtent/box/VERSION || true)
echo -e "Cloudron version: ${cloudron_version}" >> $log
dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null || true)
echo -e "Dashboard domain: ${dashboard_domain}" >> $log
2023-12-13 16:43:12 +01:00
2024-02-21 12:53:40 +01:00
echo -e $LINE"Docker"$LINE >> $log
if ! timeout --kill-after 10s 15s docker system info &>> $log 2>&1; then
echo -e "Docker (system info) is not responding" >> $log
fi
2023-12-13 16:43:12 +01:00
if ! timeout --kill-after 10s 15s docker ps -a &>> $log 2>&1; then
2024-02-21 12:53:40 +01:00
echo -e "Docker (ps) is not responding" >> $log
2023-12-13 16:43:12 +01:00
fi
2018-11-07 17:37:16 +01:00
2023-12-13 16:43:12 +01:00
echo -e $LINE"Filesystem stats"$LINE >> $log
2024-02-21 12:47:30 +01:00
if ! timeout --kill-after 10s 15s df -h &>> $log 2>&1; then
echo -e "df is not responding" >> $log
fi
2019-04-23 10:07:57 -07:00
2023-12-13 16:43:12 +01:00
echo -e $LINE"Appsdata stats"$LINE >> $log
du -hcsL /home/yellowtent/appsdata/* &>> $log || true
2019-04-23 10:07:57 -07:00
2023-12-13 16:43:12 +01:00
echo -e $LINE"Boxdata stats"$LINE >> $log
du -hcsL /home/yellowtent/boxdata/* &>> $log
2019-04-23 10:07:57 -07:00
2023-12-13 16:43:12 +01:00
echo -e $LINE"Backup stats (possibly misleading)"$LINE >> $log
du -hcsL /var/backups/* &>> $log || true
2018-11-07 17:37:16 +01:00
2023-12-13 16:43:12 +01:00
echo -e $LINE"System daemon status"$LINE >> $log
systemctl status --lines=100 box mysql unbound cloudron-syslog nginx collectd docker &>> $log
echo -e $LINE"Box logs"$LINE >> $log
tail -n 100 /home/yellowtent/platformdata/logs/box.log &>> $log
echo -e $LINE"Interface Info"$LINE >> $log
ip addr &>> $log
echo -e $LINE"Firewall chains"$LINE >> $log
iptables -L &>> $log
has_ipv6=$(cat /proc/net/if_inet6 >/dev/null 2>&1 && echo "yes" || echo "no")
echo -e "IPv6: ${has_ipv6}" >> $log
[[ "${has_ipv6}" == "yes" ]] && ip6tables -L &>> $log
echo "Done"
echo -n "Uploading information..."
paste_key=$(curl -X POST ${PASTEBIN}/documents --silent --data-binary "@$log" | python3 -c "import sys, json; print(json.load(sys.stdin)['key'])")
echo "Done"
2018-11-07 17:37:16 +01:00
2023-12-13 16:43:12 +01:00
echo -e "\nPlease email the following link to support@cloudron.io : ${PASTEBIN}/${paste_key}"
}
2024-06-26 22:36:58 +02:00
function check_dns() {
2024-08-20 16:51:56 +02:00
if host cloudron.io &>/dev/null; then
success "DNS is resolving via systemd-resolved"
return
fi
if ! systemctl is-active -q systemd-resolved; then
warn "systemd-resolved is not in use. see 'systemctl status systemd-resolved'"
2024-06-26 22:36:58 +02:00
fi
2024-08-20 16:51:56 +02:00
if [[ -L /etc/resolv.conf ]]; then
target=$(readlink /etc/resolv.conf)
if [[ "$target" != *"/run/systemd/resolve/stub-resolv.conf" ]]; then
warn "/etc/resolv.conf is symlinked to $target instead of '../run/systemd/resolve/stub-resolv.conf'"
fi
else
warn "/etc/resolv.conf is not symlinked to '../run/systemd/resolve/stub-resolv.conf'"
fi
if ! grep -q "^nameserver 127.0.0.53" /etc/resolv.conf; then
warn "/etc/resolv.conf is not using systemd-resolved. it is missing the line 'nameserver 127.0.0.53'"
fi
fail "DNS is not resolving"
host cloudron.io || true
exit 1
2024-06-26 22:36:58 +02:00
}
2023-12-14 16:53:34 +01:00
function check_unbound() {
if ! systemctl is-active -q unbound; then
2024-09-20 09:58:36 +02:00
info "unbound is down. restarting to see if it fixes it" # unbound-anchor is part of ExecStartPre
2023-12-14 16:53:34 +01:00
systemctl restart unbound
if ! systemctl is-active -q unbound; then
2023-12-14 17:04:45 +01:00
fail "unbound is still down, please investigate the error using 'journalctl -u unbound'"
2023-12-14 16:53:34 +01:00
exit 1
fi
fi
2024-04-22 17:43:11 +02:00
if ! wait_systemd_service unbound; then
fail "unbound service keeps restarting, please investigate the error using 'journalctl -u unbound'"
exit 1
fi
2024-07-01 07:57:21 +02:00
if ! host cloudron.io 127.0.0.150 &>/dev/null; then
2024-08-20 16:51:56 +02:00
fail "Unbound is not resolving, maybe try forwarding all DNS requests. You can do this by running 'cloudron-support --unbound-use-external-dns' option"
2024-07-01 07:57:21 +02:00
host cloudron.io 127.0.0.150
2023-12-14 16:53:34 +01:00
exit 1
fi
2023-12-14 17:04:45 +01:00
success "unbound is running"
2023-12-14 16:53:34 +01:00
}
2024-01-29 13:36:53 +01:00
function check_dashboard_cert() {
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
local -r nginx_conf_file="/home/yellowtent/platformdata/nginx/applications/dashboard/my.${dashboard_domain}.conf"
local -r cert_file=$(sed -n -e 's/.*ssl_certificate [[:space:]]\+\(.*\);/\1/p' "${nginx_conf_file}")
local -r cert_expiry_date=$(openssl x509 -enddate -noout -in "${cert_file}" | sed -e 's/notAfter=//')
if ! openssl x509 -checkend 100 -noout -in "${cert_file}" >/dev/null 2>&1; then
fail "Certificate has expired. Certificate expired at ${cert_expiry_date}"
2024-01-29 15:07:40 +01:00
local -r task_id=$(mysql -NB -uroot -ppassword -e "SELECT id FROM box.tasks WHERE type='checkCerts' ORDER BY id DESC LIMIT 1" 2>/dev/null)
echo -e "\tPlease check /home/yellowtent/platformdata/logs/tasks/${task_id}.log for last cert renewal logs"
echo -e "\tCommon issues include expiry of domain's API key OR incoming http port 80 not being open"
2024-01-29 13:36:53 +01:00
exit 1
fi
2024-06-26 22:36:58 +02:00
success "dashboard cert is valid"
2024-01-29 13:36:53 +01:00
}
2023-12-14 16:53:34 +01:00
function check_nginx() {
2023-12-14 17:40:03 +01:00
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
2025-04-13 10:39:25 +02:00
# it is possible nginx is running but can't be restarted
if ! systemctl reload -q nginx; then
2023-12-14 17:40:03 +01:00
fail "nginx is down. Removing extraneous dashboard domain configs ..."
2024-04-23 10:00:06 +02:00
# we had a bug where old dashboard domain config file was kept around
2023-12-14 17:40:03 +01:00
cd /home/yellowtent/platformdata/nginx/applications/dashboard/ && find . ! -name "my.${dashboard_domain}.conf" -type f -exec rm -f {} +
2024-04-23 10:00:06 +02:00
# check if certificates are there. nginx will still start if certs are expired
# IFS= makes sure it doesn't trim leading and trailing whitespace
# -r prevents interpretation of \ escapes.
find /home/yellowtent/platformdata/nginx -type f -name '*.conf' -print0 | while IFS= read -r -d '' conf; do
cert_file=$(sed -ne 's/[[:blank:]]\+ssl_certificate[[:blank:]]\+\(.*\);/\1/p' "${conf}")
key_file=$(sed -ne 's/[[:blank:]]\+ssl_certificate_key[[:blank:]]\+\(.*\);/\1/p' "${conf}")
if [[ -n "${cert_file}" && ! -f "${cert_file}" ]]; then
info "${cert_file} does not exist. removing ${conf}"
rm -f "${conf}"
fi
if [[ -n "${key_file}" && ! -f "${key_file}" ]]; then
info "${key_file} does not exist. removing ${conf}"
rm -f "${conf}"
fi
done
2023-12-14 17:40:03 +01:00
systemctl restart nginx
if ! systemctl is-active -q nginx; then
2024-04-22 17:43:11 +02:00
fail "nginx is still down, please investigate the error by inspecting 'journalctl -u nginx' and /var/log/nginx/error.log"
2023-12-14 17:40:03 +01:00
exit 1
fi
2023-12-14 16:53:34 +01:00
fi
2024-04-22 17:43:11 +02:00
if ! wait_systemd_service nginx; then
fail "nginx service keeps restarting, please investigate the error using 'journalctl -u nginx' and /var/log/nginx/error.log"
exit 1
fi
2023-12-14 17:04:45 +01:00
success "nginx is running"
2023-12-14 16:53:34 +01:00
}
2024-01-29 13:36:53 +01:00
# this confirms that https works properly without any proxy (cloudflare) involved
function check_dashboard_site_loopback() {
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
if ! curl --fail -s --resolve "my.${dashboard_domain}:443:127.0.0.1" "https://my.${dashboard_domain}" >/dev/null; then
fail "Could not load dashboard website with loopback check"
exit 1
fi
2024-06-26 22:36:58 +02:00
success "dashboard is reachable via loopback"
2024-01-29 13:36:53 +01:00
}
2024-01-25 14:49:10 +01:00
function check_node() {
expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)"
current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix
if [[ "${current_node_version}" != "${expected_node_version}" ]]; then
fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}."
echo "You can try the following to fix the problem:"
echo " ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node"
echo " ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm"
2024-12-18 10:17:05 +01:00
echo " apt remove -y nodejs"
2024-01-25 14:49:10 +01:00
echo " systemctl restart box"
exit 1
fi
success "node version is correct"
}
2024-09-19 12:11:56 +02:00
function print_ipv6_disable_howto() {
echo "Instead of disabling IPv6 globally, you can disable it at an interface level."
for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
echo -e "\tsysctl -w net.ipv6.conf.${iface}.disable_ipv6=1"
done
2025-01-13 08:59:44 +01:00
echo "For the above configuration to persist across reboots, you have to add below to /etc/sysctl.conf"
2024-09-19 12:11:56 +02:00
for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
echo -e "\tnet.ipv6.conf.${iface}.disable_ipv6=1"
done
}
2024-09-06 17:20:52 +02:00
function check_ipv6() {
ipv6_disable=$(cat /sys/module/ipv6/parameters/disable)
if [[ "${ipv6_disable}" == "1" ]]; then
fail "IPv6 is disabled in kernel. Cloudron requires IPv6 in kernel"
2024-09-19 12:11:56 +02:00
print_ipv6_disable_howto
exit 1
fi
# check if server has IPv6 address
has_ipv6_address=0
for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
if ipv6=$(ip -6 addr show dev ${iface} | grep -o 'inet6 [^ ]*' | awk '{print $2}' | grep -v '^fe80'); then
[[ -n "${ipv6}" ]] && has_ipv6_address=1
fi
done
if [[ "${has_ipv6_address}" == "0" ]]; then
2024-11-26 11:10:58 +05:30
success "IPv6 is enabled in kernel. No public IPv6 address"
2024-09-19 12:11:56 +02:00
return
fi
2024-11-26 11:15:27 +05:30
if ! ping6 -q -c 1 api.cloudron.io >/dev/null 2>&1; then
fail "Server has an IPv6 address but api.cloudron.io is unreachable via IPv6 (ping6 -q -c 1 api.cloudron.io)"
2024-09-19 12:11:56 +02:00
print_ipv6_disable_howto
exit 1
2024-09-06 17:20:52 +02:00
fi
2024-09-19 12:11:56 +02:00
success "IPv6 is enabled and public IPv6 address is working"
2024-09-06 17:20:52 +02:00
}
2023-12-14 16:53:34 +01:00
function check_docker() {
if ! systemctl is-active -q docker; then
2023-12-14 17:28:30 +01:00
info "Docker is down. Trying to restart docker ..."
2023-12-14 16:53:34 +01:00
systemctl restart docker
if ! systemctl is-active -q docker; then
2023-12-14 17:04:45 +01:00
fail "Docker is still down, please investigate the error using 'journalctl -u docker'"
2023-12-14 16:53:34 +01:00
exit 1
fi
fi
2024-04-22 17:43:11 +02:00
if ! wait_systemd_service docker; then
fail "Docker keeps restarting, please investigate the error using 'journalctl -u docker'"
exit 1
fi
2023-12-14 17:04:45 +01:00
success "docker is running"
2023-12-14 16:53:34 +01:00
}
2024-10-17 12:41:33 +02:00
function check_docker_version() {
expected_docker_version="$(sed -ne 's/readonly docker_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
if command -v docker &> /dev/null; then
current_docker_version="$(docker version --format {{.Client.Version}})"
else
current_docker_version="<not found>"
fi
if [[ "${current_docker_version}" != "${expected_docker_version}" ]]; then
fail "docker version is incorrect. Expecting ${expected_docker_version}. Got ${current_docker_version}."
echo "Run cloudron-support --fix-docker-version"
exit 1
fi
success "docker version is correct"
}
2024-01-29 13:36:53 +01:00
function check_node() {
expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)"
2024-04-22 16:41:26 +02:00
if command -v node &> /dev/null; then
current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix
else
current_node_version="<not found>"
fi
2024-01-29 13:36:53 +01:00
if [[ "${current_node_version}" != "${expected_node_version}" ]]; then
fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}."
echo "You can try the following to fix the problem:"
echo " ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node"
echo " ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm"
echo " systemctl restart box"
exit 1
fi
success "node version is correct"
}
function check_dashboard_site_domain() {
2023-12-14 16:53:34 +01:00
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
2024-01-29 13:36:53 +01:00
local -r domain_provider=$(mysql -NB -uroot -ppassword -e "SELECT provider FROM box.domains WHERE domain='${dashboard_domain}'" 2>/dev/null)
2024-11-26 11:15:27 +05:30
# TODO: check ipv4 and ipv6 separately
if ! output=$(curl --fail --connect-timeout 10 --max-time 20 -s https://my.${dashboard_domain}); then
2024-01-29 13:36:53 +01:00
fail "Could not load dashboard domain."
if [[ "${domain_provider}" == "cloudflare" ]]; then
echo "Maybe cloudflare proxying is not working. Delete the domain in Cloudflare dashboard and re-add it. This sometimes re-establishes the proxying"
else
echo "Hairpin NAT is not working. Please check if your router supports it"
fi
exit 1
fi
if ! echo $output | grep -q "Cloudron Dashboard"; then
fail "https://my.${dashboard_domain} is not the dashboard domain. Check if DNS is set properly to this server"
2025-06-08 11:06:56 +02:00
host my.${dashboard_domain} 127.0.0.53 # could also result in cloudflare
2023-12-14 16:53:34 +01:00
exit 1
fi
2024-01-29 13:36:53 +01:00
success "Dashboard is reachable via domain name"
2023-12-14 16:53:34 +01:00
}
function check_expired_domain() {
2023-12-14 17:04:45 +01:00
local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
2023-12-14 17:07:51 +01:00
if ! command -v whois &> /dev/null; then
2023-12-14 17:04:45 +01:00
info "Domain ${dashboard_domain} expiry check skipped because whois is not installed. Run 'apt install whois' to check"
2024-06-26 22:36:58 +02:00
return
2023-12-14 17:07:51 +01:00
fi
2023-12-14 16:53:34 +01:00
local -r expdate=$(whois ${dashboard_domain} | egrep -i 'Expiration Date:|Expires on|Expiry Date:' | head -1 | awk '{print $NF}')
2023-12-14 17:12:07 +01:00
if [[ -z "${expdate}" ]]; then
2023-12-14 17:04:45 +01:00
warn "Domain ${dashboard_domain} expiry check skipped because whois does not have this information"
2024-06-26 22:36:58 +02:00
return
2023-12-14 17:12:07 +01:00
fi
2023-12-14 16:53:34 +01:00
local -r expdate_secs=$(date -d"$expdate" +%s)
local -r curdate_secs="$(date +%s)"
if (( curdate_secs > expdate_secs )); then
2023-12-14 17:04:45 +01:00
fail "Domain ${dashboard_domain} appears to be expired"
2023-12-14 16:53:34 +01:00
exit 1
fi
2023-12-14 17:04:45 +01:00
success "Domain ${dashboard_domain} is valid and has not expired"
2023-12-14 16:53:34 +01:00
}
2025-03-05 18:02:28 +01:00
function unbound_use_external_dns() {
2023-12-14 18:01:28 +01:00
local -r conf_file="/etc/unbound/unbound.conf.d/forward-everything.conf"
2023-12-14 16:53:34 +01:00
2023-12-14 18:01:28 +01:00
info "To remove the forwarding, please delete $conf_file and 'systemctl restart unbound'"
2023-12-14 16:53:34 +01:00
cat > $conf_file <<EOF
forward-zone:
name: "."
forward-addr: 1.1.1.1
forward-addr: 8.8.8.8
EOF
systemctl restart unbound
2023-12-14 18:01:28 +01:00
success "Forwarded all DNS requests to Google (8.8.8.8) & Cloudflare DNS (1.1.1.1)"
2023-12-14 16:53:34 +01:00
}
2023-12-14 17:04:05 +01:00
function disable_dnssec() {
2023-12-14 18:01:28 +01:00
local -r conf_file="/etc/unbound/unbound.conf.d/disable-dnssec.conf"
2023-12-14 17:04:05 +01:00
2023-12-14 18:01:28 +01:00
warn "To reenable DNSSEC, please delete $conf_file and 'systemctl restart unbound'"
2023-12-14 17:04:05 +01:00
cat > $conf_file <<EOF
server:
val-permissive-mode: yes
EOF
systemctl restart unbound
2023-12-14 18:01:28 +01:00
success "DNSSEC Disabled"
2023-12-14 17:04:05 +01:00
}
2024-07-01 14:08:55 +02:00
function print_system() {
2024-07-01 08:24:01 +02:00
vendor=$(cat /sys/devices/virtual/dmi/id/sys_vendor)
product=$(cat /sys/devices/virtual/dmi/id/product_name)
echo "Vendor: ${vendor} Product: ${product}"
2024-07-01 13:38:18 +02:00
ubuntu_codename=$(lsb_release -cs)
ubuntu_version=$(lsb_release -rs)
linux_version=$(uname -r)
2024-07-01 14:08:55 +02:00
echo "Linux: ${linux_version}"
echo "Ubuntu: ${ubuntu_codename} ${ubuntu_version}"
proc_count=$(grep -c ^processor /proc/cpuinfo)
2024-07-16 19:21:09 +02:00
proc_name=$(lscpu | grep "Model name:" | sed -r 's/Model name:\s{1,}//g')
2024-07-01 14:08:55 +02:00
echo "Processor: ${proc_name} x ${proc_count}"
ram_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}')
echo "RAM: ${ram_kb}KB"
disk_size=$(LC_ALL=C df -h --output=source,avail / | tail -n1)
echo "Disk: ${disk_size}"
}
2024-07-01 08:24:01 +02:00
2024-07-01 14:08:55 +02:00
function troubleshoot() {
2023-12-14 16:53:34 +01:00
# note: disk space test has already been run globally
2024-07-01 14:08:55 +02:00
print_system
2024-01-25 14:49:10 +01:00
check_node
2024-09-06 17:20:52 +02:00
check_ipv6
2023-12-14 16:53:34 +01:00
check_docker
2024-10-17 12:41:33 +02:00
check_docker_version
2023-12-14 16:53:34 +01:00
check_host_mysql
2024-01-25 14:49:10 +01:00
check_nginx # requires mysql to be checked
2024-07-16 19:21:09 +02:00
check_dashboard_cert
2024-01-29 13:36:53 +01:00
check_dashboard_site_loopback # checks website via loopback
2023-12-15 15:45:29 +01:00
check_box
2024-07-01 08:24:01 +02:00
check_netplan
2024-06-26 22:36:58 +02:00
check_dns
2024-01-29 13:36:53 +01:00
check_dashboard_site_domain # check website via domain name
2023-12-14 16:53:34 +01:00
check_expired_domain
2024-06-26 22:36:58 +02:00
check_unbound # this is less fatal after 8.0
2023-12-14 16:53:34 +01:00
}
2025-01-31 10:17:34 +01:00
function cleanup_disk_space() {
2025-06-15 13:08:54 +02:00
read -p "Truncate log files to reclaim space? [y/N] " choice
choice=${choice:-n}
2025-01-31 10:17:34 +01:00
if [[ $choice =~ ^[Yy]$ ]]; then
truncate -s0 /home/yellowtent/platformdata/logs/*/*.log
rm -f /home/yellowtent/platformdata/logs/*.log.* # delete the log.1, log.2 etc
fi
2025-06-15 13:08:54 +02:00
read -p "Prune docker system resources to reclaim space? [y/N] " choice
choice=${choice:-n}
2025-01-31 10:17:34 +01:00
if [[ $choice =~ ^[Yy]$ ]]; then
docker images prune -fa || true
fi
2025-06-15 13:08:54 +02:00
read -p "Prune docker volumes to reclaim space? [y/N] " choice
choice=${choice:-n}
2025-01-31 10:17:34 +01:00
if [[ $choice =~ ^[Yy]$ ]]; then
for container in $(docker ps --format "{{.ID}}"); do
docker exec "$container" find /tmp -type f -mtime +1 -delete || true
docker exec "$container" find /run -type f -mtime +1 -delete || true
done
fi
}
2023-12-13 16:43:12 +01:00
function check_disk_space() {
# check if at least 10mb root partition space is available
if [[ "`df --output="avail" / | sed -n 2p`" -lt "10240" ]]; then
2025-01-31 10:17:34 +01:00
echo "No more space left on / (see df -h output)"
cleanup_disk_space
fi
if [[ "`df --output="avail" / | sed -n 2p`" -lt "10240" ]]; then
echo "Still no space despite cleaning up. If you have backups (/var/backups) on this disk, delete old backups to free some space"
2023-12-13 16:43:12 +01:00
exit 1
fi
2018-11-19 19:47:19 +01:00
2023-12-13 16:43:12 +01:00
# check for at least 5mb free /tmp space for the log file
if [[ "`df --output="avail" /tmp | sed -n 2p`" -lt "5120" ]]; then
echo "Not enough space left on /tmp"
echo "Free up some space first by deleting files from /tmp"
exit 1
fi
}
2019-06-10 09:31:34 -07:00
2024-06-14 10:57:40 +02:00
function do_recreate_containers() {
echo -e "Re-creating addon and app containers\n"
2024-06-13 18:51:11 +02:00
info "Follow re-create logs in a second terminal with:"
info "$ tail -f ${logfile}"
2024-06-13 17:55:22 +02:00
echo ""
2024-06-14 10:57:40 +02:00
echo -n "This takes a while ."
2024-06-13 17:55:22 +02:00
line_count=$(cat "${logfile}" | wc -l)
sed -e 's/"version": ".*",/"version":"48.0.0",/' -i /home/yellowtent/platformdata/INFRA_VERSION
2024-06-14 10:57:40 +02:00
systemctl restart -q box # will re-create docker network
2024-06-13 17:55:22 +02:00
while ! tail -n "+${line_count}" "${logfile}" | grep -q "platform is ready"; do
echo -n "."
sleep 2
done
2024-06-14 17:08:23 +02:00
echo -e "\n\nDone! Addon containers successfully re-created. The apps in the dashboard will"
echo -e "say 'Configuring (Queued)'. They will come up in a short while.\n"
2024-06-14 10:57:40 +02:00
}
function recreate_containers() {
readonly logfile="/home/yellowtent/platformdata/logs/box.log"
echo "This will re-create all the containers. Apps will go down for a while. No data will be lost."
2025-06-15 13:08:54 +02:00
read -p "Do you want to proceed? [y/N] " choice
choice=${choice:-n}
2024-06-14 10:57:40 +02:00
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
do_recreate_containers
}
function download_docker_images() {
info "Downloading addon images"
images=$(node -e "const i = require('/home/yellowtent/box/src/infra_version.js'); console.log(Object.keys(i.images).map(x => i.images[x]).join(' '));")
for image in ${images}; do
info "Pulling ${image}"
docker pull -q "${image}" &>/dev/null # this pulls the image using the sha256
docker pull -q "${image%@sha256:*}" &>/dev/null # this will tag the image for readability
done
}
function ask_reboot() {
2025-06-15 13:08:54 +02:00
read -p "Do you want to reboot ? [y/N] " choice
choice=${choice:-n}
2024-06-14 10:57:40 +02:00
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
reboot
2024-06-14 12:26:35 +02:00
exit
2024-06-13 17:55:22 +02:00
}
function recreate_docker() {
2024-06-13 18:51:11 +02:00
readonly logfile="/home/yellowtent/platformdata/logs/box.log"
2024-06-14 10:57:40 +02:00
readonly stagefile="/home/yellowtent/platformdata/recreate-docker-stage"
2024-09-09 18:42:08 +02:00
readonly containerd_root="/var/lib/containerd"
2024-06-13 18:51:11 +02:00
2024-06-14 12:26:35 +02:00
if ! docker_root=$(docker info -f '{{ .DockerRootDir }}' 2>/dev/null); then
2024-06-13 18:51:11 +02:00
warning "Unable to detect docker root. Assuming /var/lib/docker"
fi
2024-06-14 12:26:35 +02:00
[[ -z "${docker_root}" ]] && docker_root="/var/lib/docker"
2024-06-13 18:51:11 +02:00
2024-06-14 10:57:40 +02:00
if [[ ! -e "${stagefile}" ]]; then
2024-06-14 17:08:23 +02:00
echo "Use this command when docker storage (at $docker_root) is corrupt. It will delete"
echo "the docker storage, re-download docker images and re-create containers. Dashboard and apps"
echo -e "will be unreachable for a while. No data will be lost.\n"
echo -e "The server may have to be rebooted twice for this. If so, re-run this command after every reboot.\n"
2025-06-15 13:08:54 +02:00
read -p "Do you want to proceed? [y/N] " choice
choice=${choice:-n}
2024-06-13 18:51:11 +02:00
2024-06-14 10:57:40 +02:00
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
2024-06-13 18:51:11 +02:00
2024-06-14 12:26:35 +02:00
info "Stopping box and docker"
systemctl stop -q box docker containerd docker.socket || true
systemctl disable -q box docker containerd docker.socket || true
2024-06-13 18:51:11 +02:00
2024-06-14 10:57:40 +02:00
echo -e "clearing_storage" > "${stagefile}" # init
2024-06-13 18:51:11 +02:00
fi
2024-06-14 10:57:40 +02:00
if grep -q "clearing_storage" "${stagefile}"; then
info "Clearing docker storage at ${docker_root}"
2024-09-09 18:42:08 +02:00
if ! rm -rf "${docker_root}/"* "${containerd_root}/"*; then
2024-06-14 17:08:23 +02:00
echo -e "\nThe server has to be rebooted to clear the docker storage. After reboot,"
echo -e "run 'cloudron-support --recreate-docker' again.\n"
2024-06-14 10:57:40 +02:00
ask_reboot
fi
echo -e "cleared_storage" > "${stagefile}"
fi
2024-06-13 18:51:11 +02:00
2024-06-14 10:57:40 +02:00
if grep -q "cleared_storage" "${stagefile}"; then
2024-06-14 12:26:35 +02:00
info "Starting docker afresh at ${docker_root}"
systemctl enable --now -q docker.socket docker containerd
2024-06-14 10:57:40 +02:00
sleep 5 # give docker some time to initialize the storage directory
download_docker_images
echo -e "downloaded_images" > "${stagefile}"
2024-06-14 17:08:23 +02:00
echo -e "\nThe server has to be rebooted for docker to initialize properly. After reboot,"
echo -e "run 'cloudron-support --recreate-docker' again.\n" # else docker network is not completely functional
2024-06-14 10:57:40 +02:00
ask_reboot
fi
2024-06-13 18:51:11 +02:00
2024-06-14 10:57:40 +02:00
if grep -q "downloaded_images" "${stagefile}"; then
2024-06-14 12:26:35 +02:00
systemctl enable -q box
2024-06-14 10:57:40 +02:00
do_recreate_containers
fi
2024-06-13 18:51:11 +02:00
2024-06-14 10:57:40 +02:00
rm "${stagefile}"
2024-06-13 17:55:22 +02:00
}
2024-10-17 12:41:33 +02:00
function fix_docker_version() {
ubuntu_codename=$(lsb_release -cs)
ubuntu_version=$(lsb_release -rs)
docker_version="$(sed -ne 's/readonly docker_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
containerd_version="$(sed -ne 's/readonly containerd_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
echo "downloading docker ${docker_version}"
# copied from installer.sh
curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/containerd.io_${containerd_version}_amd64.deb" -o /tmp/containerd.deb
curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/docker-ce-cli_${docker_version}-1~ubuntu.${ubuntu_version}~${ubuntu_codename}_amd64.deb" -o /tmp/docker-ce-cli.deb
curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/docker-ce_${docker_version}-1~ubuntu.${ubuntu_version}~${ubuntu_codename}_amd64.deb" -o /tmp/docker.deb
echo "installing docker"
apt install -y --allow-downgrades /tmp/containerd.deb /tmp/docker-ce-cli.deb /tmp/docker.deb
rm /tmp/containerd.deb /tmp/docker-ce-cli.deb /tmp/docker.deb
}
2024-07-12 11:03:56 +02:00
function apply_patch() {
commit_id="$1"
patch_file="/tmp/${commit_id}.patch"
# gitlab will return 404 if it looks like a valid commit id but doesn't exist. it returns login page with invalid commit id
if ! curl -s "https://git.cloudron.io/cloudron/box/-/commit/${commit_id}.patch" -D /tmp/headers -o "${patch_file}"; then
echo "Could not connect to git"
exit 1
fi
if ! grep -q "content-type: text/plain" /tmp/headers; then
echo "Not a valid commit"
exit 1
fi
echo "This will apply ${commit_id} (${patch_file}) from git and restart the box code."
warn "Do not proceed unless you know what you are doing."
2025-06-15 13:08:54 +02:00
read -p "Do you want to apply the patch? [y/N] " choice
choice=${choice:-n}
2024-07-12 11:03:56 +02:00
[[ ! $choice =~ ^[Yy]$ ]] && exit 1
if ! patch --force --dry-run -d /home/yellowtent/box -p1 -i "${patch_file}"; then
echo "Patch does not apply cleanly"
exit 1
fi
patch -d /home/yellowtent/box -p1 -i "${patch_file}"
systemctl restart box
echo "Patch applied"
}
2023-12-13 16:43:12 +01:00
check_disk_space
2018-11-07 17:37:16 +01:00
2025-07-11 10:44:35 +02:00
args=$(getopt -o "" -l "admin-login,disable-dnssec,enable-remote-support,disable-remote-support,help,owner-login,patch:,recreate-containers,recreate-docker,fix-docker-version,send-diagnostics,unbound-use-external-dns,troubleshoot" -n "$0" -- "$@")
2023-12-13 16:43:12 +01:00
eval set -- "${args}"
2018-11-07 17:37:16 +01:00
2023-12-13 16:43:12 +01:00
while true; do
case "$1" in
--enable-ssh)
# fall through
;&
2024-11-08 16:01:30 +01:00
--enable-remote-support) enable_remote_support; exit 0;;
2025-07-11 10:33:22 +02:00
--disable-remote-support) disable_remote_support; exit 0;;
2023-12-13 16:43:12 +01:00
--admin-login)
# fall through
;&
--owner-login) owner_login; exit 0;;
2023-12-13 16:58:30 +01:00
--send-diagnostics) send_diagnostics; exit 0;;
2023-12-14 16:53:34 +01:00
--troubleshoot) troubleshoot; exit 0;;
2023-12-14 18:01:28 +01:00
--disable-dnssec) disable_dnssec; exit 0;;
2024-08-20 16:51:56 +02:00
--unbound-use-external-dns) unbound_use_external_dns; exit 0;;
2024-06-13 17:55:22 +02:00
--recreate-containers) recreate_containers; exit 0;;
--recreate-docker) recreate_docker; exit 0;;
2024-10-17 12:41:33 +02:00
--fix-docker-version) fix_docker_version; exit 0;;
2024-07-12 11:03:56 +02:00
--patch) apply_patch "$2"; exit 0;;
2023-12-13 16:43:12 +01:00
--help) break;;
--) break;;
*) echo "Unknown option $1"; exit 1;;
esac
done
2019-05-21 09:44:58 -07:00
2023-12-13 16:43:12 +01:00
echo -e "${HELP_MESSAGE}"