#!/bin/sh

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# Create a Hadoop AMI. Runs on the EC2 instance.

# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-ec2-env.sh

# Remove environment script since it contains sensitive information
rm -f "$bin"/hadoop-ec2-env.sh

# Install Java
echo "Downloading and installing java binary."
cd /usr/local
wget -nv -O java.bin $JAVA_BINARY_URL
sh java.bin
rm -f java.bin

# Install tools
echo "Installing rpms."
yum -y install rsync lynx screen ganglia-gmetad ganglia-gmond ganglia-web httpd php
yum -y clean all

# Install Hadoop
echo "Installing Hadoop $HADOOP_VERSION."
cd /usr/local
wget -nv http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
[ ! -f hadoop-$HADOOP_VERSION.tar.gz ] && wget -nv http://www.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
tar xzf hadoop-$HADOOP_VERSION.tar.gz
rm -f hadoop-$HADOOP_VERSION.tar.gz

# Configure Hadoop
sed -i -e "s|# export JAVA_HOME=.*|export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}|" \
       -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/mnt/hadoop/logs|' \
       -e 's|# export HADOOP_SLAVE_SLEEP=.*|export HADOOP_SLAVE_SLEEP=1|' \
       -e 's|# export HADOOP_OPTS=.*|export HADOOP_OPTS=-server|' \
      /usr/local/hadoop-$HADOOP_VERSION/conf/hadoop-env.sh

# Run user data as script on instance startup
chmod +x /etc/init.d/ec2-run-user-data
echo "/etc/init.d/ec2-run-user-data" >> /etc/rc.d/rc.local

# Setup root user bash environment
echo "export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}" >> /root/.bash_profile
echo "export HADOOP_HOME=/usr/local/hadoop-${HADOOP_VERSION}" >> /root/.bash_profile
echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> /root/.bash_profile

# Configure networking.
# Delete SSH authorized_keys since it includes the key it was launched with. (Note that it is re-populated when an instance starts.)
rm -f /root/.ssh/authorized_keys
# Ensure logging in to new hosts is seamless.
echo '    StrictHostKeyChecking no' >> /etc/ssh/ssh_config

# Bundle and upload image
cd ~root
# Don't need to delete .bash_history since it isn't written until exit.
df -h
ec2-bundle-vol -d /mnt -k /mnt/pk*.pem -c /mnt/cert*.pem -u $AWS_ACCOUNT_ID -s 3072 -p hadoop-$HADOOP_VERSION-$ARCH -r $ARCH

ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml -a $AWS_ACCESS_KEY_ID -s $AWS_SECRET_ACCESS_KEY

# End
echo Done
