#!/bin/bash

set -eu -o pipefail

# scripts requires root
if [[ ${EUID} -ne 0 ]]; then
    echo "This script should be run as root. Run with sudo"
    exit 1
fi

readonly RED='\033[31m'
readonly GREEN='\033[32m'
readonly YELLOW='\033[33m'
readonly DONE='\033[m'

readonly PASTEBIN="https://paste.cloudron.io"
readonly LINE="\n========================================================\n"
readonly HELP_MESSAGE="
 Cloudron Support and Diagnostics Tool

 Options:
   --disable-dnssec          Disable DNSSEC
   --enable-remote-access    Enable SSH Remote Access for the Cloudron support team
   --send-diagnostics        Collects server diagnostics and uploads it to ${PASTEBIN}
   --troubleshoot            Dashboard down? Run tests to identify the potential problem
   --owner-login             Login as owner
   --use-external-dns        Forwards all DNS requests to Google (8.8.8.8) and Cloudflare (1.1.1.1) DNS servers
   --help                    Show this message
"

function success() {
    echo -e "[${GREEN}OK${DONE}]\t${1}"
}

function info() {
    echo -e "\t${1}"
}

function warn() {
    echo -e "[${YELLOW}WARN${DONE}]\t${1}"
}

function fail() {
    echo -e "[${RED}FAIL${DONE}]\t${1}" >&2
}

function enable_remote_access() {
    local -r cloudron_support_public_key="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGWS+930b8QdzbchGljt3KSljH9wRhYvht8srrtQHdzg support@cloudron.io"
    local -r ssh_user="cloudron-support"
    local -r keys_file="/home/cloudron-support/.ssh/authorized_keys"

    echo -n "Enabling Remote Access for the Cloudron support team..."
    mkdir -p $(dirname "${keys_file}")       # .ssh does not exist sometimes
    touch "${keys_file}"                # required for concat to work
    if ! grep -q "${cloudron_support_public_key}" "${keys_file}"; then
        echo -e "\n${cloudron_support_public_key}" >> "${keys_file}"
        chmod 600 "${keys_file}"
        chown "${ssh_user}" "${keys_file}"
    fi

    echo "Done"
}

function check_host_mysql() {
    if ! systemctl is-active -q mysql; then
        info "MySQL is down. Trying to restart MySQL ..."

        systemctl restart mysql

        if ! systemctl is-active -q mysql; then
            fail "MySQL is still down, please investigate the error by inspecting /var/log/mysql/error.log"
            exit 1
        fi
    fi

    success "MySQL is running"
}

function check_box() {
    if ! systemctl is-active -q box; then
        info "box is down. re-running migration script and restarting it ..."

        /home/yellowtent/box/setup/start.sh
        systemctl stop box # a restart sometimes doesn't restart, no idea
        systemctl start box

        if ! systemctl is-active -q box; then
            fail "box is still down, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log"
            exit 1
        fi
    fi

    success "box is running"
}

function owner_login() {
    check_host_mysql >/dev/null

    local -r owner_username=$(mysql -NB -uroot -ppassword -e "SELECT username FROM box.users WHERE role='owner' AND username IS NOT NULL AND active=1 ORDER BY creationTime LIMIT 1" 2>/dev/null)
    local -r owner_password=$(pwgen -1s 12)
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
    mysql -NB -uroot -ppassword -e "INSERT INTO box.settings (name, value) VALUES ('ghosts_config', '{\"${owner_username}\":\"${owner_password}\"}') ON DUPLICATE KEY UPDATE name='ghosts_config', value='{\"${owner_username}\":\"${owner_password}\"}'" 2>/dev/null
    echo "Login at https://my.${dashboard_domain} as ${owner_username} / ${owner_password} . This password may only be used once."
}

function send_diagnostics() {
    local -r log="/tmp/cloudron-support.log"

    echo -n "Generating Cloudron Support stats..."

    rm -rf $log

    echo -e $LINE"Linux"$LINE >> $log
    uname -nar &>> $log

    echo -e $LINE"Ubuntu"$LINE >> $log
    lsb_release -a &>> $log

    echo -e $LINE"Cloudron"$LINE >> $log
    cloudron_version=$(cat /home/yellowtent/box/VERSION || true)
    echo -e "Cloudron version: ${cloudron_version}" >> $log
    dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null || true)
    echo -e "Dashboard domain: ${dashboard_domain}" >> $log

    echo -e $LINE"Docker"$LINE >> $log
    if ! timeout --kill-after 10s 15s docker system info &>> $log 2>&1; then
        echo -e "Docker (system info) is not responding" >> $log
    fi

    if ! timeout --kill-after 10s 15s docker ps -a &>> $log 2>&1; then
        echo -e "Docker (ps) is not responding" >> $log
    fi

    echo -e $LINE"Filesystem stats"$LINE >> $log
    if ! timeout --kill-after 10s 15s df -h &>> $log 2>&1; then
        echo -e "df is not responding" >> $log
    fi

    echo -e $LINE"Appsdata stats"$LINE >> $log
    du -hcsL /home/yellowtent/appsdata/* &>> $log || true

    echo -e $LINE"Boxdata stats"$LINE >> $log
    du -hcsL /home/yellowtent/boxdata/* &>> $log

    echo -e $LINE"Backup stats (possibly misleading)"$LINE >> $log
    du -hcsL /var/backups/* &>> $log || true

    echo -e $LINE"System daemon status"$LINE >> $log
    systemctl status --lines=100 box mysql unbound cloudron-syslog nginx collectd docker &>> $log

    echo -e $LINE"Box logs"$LINE >> $log
    tail -n 100 /home/yellowtent/platformdata/logs/box.log &>> $log

    echo -e $LINE"Interface Info"$LINE >> $log
    ip addr &>> $log

    echo -e $LINE"Firewall chains"$LINE >> $log
    iptables -L &>> $log
    has_ipv6=$(cat /proc/net/if_inet6 >/dev/null 2>&1 && echo "yes" || echo "no")
    echo -e "IPv6: ${has_ipv6}" >> $log
    [[ "${has_ipv6}" == "yes" ]] && ip6tables -L &>> $log

    echo "Done"

    echo -n "Uploading information..."
    paste_key=$(curl -X POST ${PASTEBIN}/documents --silent --data-binary "@$log" | python3 -c "import sys, json; print(json.load(sys.stdin)['key'])")
    echo "Done"

    echo -e "\nPlease email the following link to support@cloudron.io : ${PASTEBIN}/${paste_key}"
}

function check_unbound() {
    if ! systemctl is-active -q unbound; then
        info "unbound is down. updating root anchor to see if it fixes it"
        unbound-anchor -a /var/lib/unbound/root.key
        systemctl restart unbound

        if ! systemctl is-active -q unbound; then
            fail "unbound is still down, please investigate the error using 'journalctl -u unbound'"
            exit 1
        fi
    fi

    test_resolve=$(dig cloudron.io @127.0.0.1 +short)
    if [[ -z "test_resolve" ]]; then
        fail "DNS is not resolving, maybe try forwarding all DNS requests using the --use-external-dns option"
        exit 1
    fi

    success "unbound is running"
}

function check_dashboard_cert() {
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
    local -r nginx_conf_file="/home/yellowtent/platformdata/nginx/applications/dashboard/my.${dashboard_domain}.conf"
    local -r cert_file=$(sed -n -e 's/.*ssl_certificate [[:space:]]\+\(.*\);/\1/p' "${nginx_conf_file}")

    local -r cert_expiry_date=$(openssl x509 -enddate -noout -in "${cert_file}" | sed -e 's/notAfter=//')

    if ! openssl x509 -checkend 100 -noout -in "${cert_file}" >/dev/null 2>&1; then
        fail "Certificate has expired. Certificate expired at ${cert_expiry_date}"

        local -r task_id=$(mysql -NB -uroot -ppassword -e "SELECT id FROM box.tasks WHERE type='checkCerts' ORDER BY id DESC LIMIT 1" 2>/dev/null)
        echo -e "\tPlease check /home/yellowtent/platformdata/logs/tasks/${task_id}.log for last cert renewal logs"
        echo -e "\tCommon issues include expiry of domain's API key OR incoming http port 80 not being open"
        exit 1
    fi
}

function check_nginx() {
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)

    if ! systemctl is-active -q nginx; then
        fail "nginx is down. Removing extraneous dashboard domain configs ..."

        cd /home/yellowtent/platformdata/nginx/applications/dashboard/ && find . ! -name "my.${dashboard_domain}.conf" -type f -exec rm -f {} +
        systemctl restart nginx

        if ! systemctl is-active -q nginx; then
            fail "nginx is still down, please investigate the error by inspecting /var/log/nginx/error.log"
            exit 1
        fi
    fi

    success "nginx is running"
}

# this confirms that https works properly without any proxy (cloudflare) involved
function check_dashboard_site_loopback() {
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)

    if ! curl --fail -s --resolve "my.${dashboard_domain}:443:127.0.0.1" "https://my.${dashboard_domain}" >/dev/null; then
        fail "Could not load dashboard website with loopback check"
        exit 1
    fi
}

function check_node() {
    expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)"
    current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix

    if [[ "${current_node_version}" != "${expected_node_version}" ]]; then
        fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}."
        echo "You can try the following to fix the problem:"
        echo "    ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node"
        echo "    ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm"
        echo "    systemctl restart box"
        exit 1
    fi

    success "node version is correct"
}

function check_docker() {
    if ! systemctl is-active -q docker; then
        info "Docker is down. Trying to restart docker ..."
        systemctl restart docker

        if ! systemctl is-active -q docker; then
            fail "Docker is still down, please investigate the error using 'journalctl -u docker'"
            exit 1
        fi
    fi

    success "docker is running"
}

function check_node() {
    expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)"
    current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix

    if [[ "${current_node_version}" != "${expected_node_version}" ]]; then
        fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}."
        echo "You can try the following to fix the problem:"
        echo "    ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node"
        echo "    ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm"
        echo "    systemctl restart box"
        exit 1
    fi

    success "node version is correct"
}

function check_docker() {
    if ! systemctl is-active -q docker; then
        info "Docker is down. Trying to restart docker ..."
        systemctl restart docker

        if ! systemctl is-active -q docker; then
            fail "Docker is still down, please investigate the error using 'journalctl -u docker'"
            exit 1
        fi
    fi

    success "docker is running"
}

function check_dashboard_site_domain() {
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
    local -r domain_provider=$(mysql -NB -uroot -ppassword -e "SELECT provider FROM box.domains WHERE domain='${dashboard_domain}'" 2>/dev/null)

    # TODO: check ipv4 and ipv6
    if ! output=$(curl --fail -s https://my.${dashboard_domain}); then
        fail "Could not load dashboard domain."
        if [[ "${domain_provider}" == "cloudflare" ]]; then
            echo "Maybe cloudflare proxying is not working. Delete the domain in Cloudflare dashboard and re-add it. This sometimes re-establishes the proxying"
        else
            echo "Hairpin NAT is not working. Please check if your router supports it"
        fi
        exit 1
    fi

    if ! echo $output | grep -q "Cloudron Dashboard"; then
        fail "https://my.${dashboard_domain} is not the dashboard domain. Check if DNS is set properly to this server"
        host my.${dashboard_domain} 127.0.0.1 # could also result in cloudflare
        exit 1
    fi

    success "Dashboard is reachable via domain name"
}

function check_expired_domain() {
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)

    if ! command -v whois &> /dev/null; then
        info "Domain ${dashboard_domain} expiry check skipped because whois is not installed. Run 'apt install whois' to check"
        exit 0
    fi

    local -r expdate=$(whois ${dashboard_domain} | egrep -i 'Expiration Date:|Expires on|Expiry Date:' | head -1 | awk '{print $NF}')
    if [[ -z "${expdate}" ]]; then
        warn "Domain ${dashboard_domain} expiry check skipped because whois does not have this information"
        exit 0
    fi

    local -r expdate_secs=$(date -d"$expdate" +%s)
    local -r curdate_secs="$(date +%s)"

    if (( curdate_secs > expdate_secs )); then
        fail "Domain ${dashboard_domain} appears to be expired"
        exit 1
    fi

    success "Domain ${dashboard_domain} is valid and has not expired"
}

function use_external_dns() {
    local -r conf_file="/etc/unbound/unbound.conf.d/forward-everything.conf"

    info "To remove the forwarding, please delete $conf_file and 'systemctl restart unbound'"

    cat > $conf_file <<EOF
forward-zone:
    name: "."
    forward-addr: 1.1.1.1
    forward-addr: 8.8.8.8
EOF

    systemctl restart unbound

    success "Forwarded all DNS requests to Google (8.8.8.8) & Cloudflare DNS (1.1.1.1)"
}

function disable_dnssec() {
    local -r conf_file="/etc/unbound/unbound.conf.d/disable-dnssec.conf"

    warn "To reenable DNSSEC, please delete $conf_file and 'systemctl restart unbound'"

    cat > $conf_file <<EOF
server:
  val-permissive-mode: yes
EOF

    systemctl restart unbound

    success "DNSSEC Disabled"
}

function troubleshoot() {
    # note: disk space test has already been run globally
    check_node
    check_docker
    check_host_mysql
    check_nginx # requires mysql to be checked
    check_dashboard_site_loopback # checks website via loopback
    check_box
    check_unbound
    check_dashboard_cert
    check_dashboard_site_domain # check website via domain name
    check_expired_domain
}

function check_disk_space() {
    # check if at least 10mb root partition space is available
    if [[ "`df --output="avail" / | sed -n 2p`" -lt "10240" ]]; then
        echo "No more space left on /"
        echo "This is likely the root case of the issue. Free up some space and also check other partitions below:"
        echo ""
        df -h
        echo ""
        echo "To recover from a full disk, follow the guide at https://docs.cloudron.io/troubleshooting/#recovery-after-disk-full"
        exit 1
    fi

    # check for at least 5mb free /tmp space for the log file
    if [[ "`df --output="avail" /tmp | sed -n 2p`" -lt "5120" ]]; then
        echo "Not enough space left on /tmp"
        echo "Free up some space first by deleting files from /tmp"
        exit 1
    fi
}

check_disk_space

args=$(getopt -o "" -l "admin-login,disable-dnssec,enable-ssh,enable-remote-access,help,owner-login,send-diagnostics,use-external-dns,troubleshoot" -n "$0" -- "$@")
eval set -- "${args}"

while true; do
    case "$1" in
    --enable-ssh)
        # fall through
        ;&
    --enable-remote-access) enable_remote_access; exit 0;;
    --admin-login)
        # fall through
        ;&
    --owner-login) owner_login; exit 0;;
    --send-diagnostics) send_diagnostics; exit 0;;
    --troubleshoot) troubleshoot; exit 0;;
    --disable-dnssec) disable_dnssec; exit 0;;
    --use-external-dns) use_external_dns; exit 0;;
    --help) break;;
    --) break;;
    *) echo "Unknown option $1"; exit 1;;
    esac
done

echo -e "${HELP_MESSAGE}"
