HDFS分布式文件系统3-2 shell定期采集数据到HDFS

发布于:2023-01-04 ⋅ 阅读:(300) ⋅ 点赞:(0)

1、准备工作
创建目录:
/export/data/logs/log
/export/data/logs/toupload
2、在/export/data/logs目录下创建upload2HDFS.sh

内容如下:
#!/bin/bash
export JAVA_HOME=/export/servers/jdk
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:JAVA_HOME/lib:JRE_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/export/servers/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
#日志存放目录(注意目录最后面有/)
log_src_dir=/export/data/logs/log/
#待上传目录
log_toupload_dir=/export/data/logs/toupload/
#设置日期
date1=`date -d last-day +%Y_%m_%d`
#日志文件上传到hdfs的根路径
hdfs_root_dir=/data/clicklog/$date1/
#打印环境变量信息
echo "envs:hadoop_home:$HADOOP_HOME"
#读取日志文件的目录,判读是否有需要上传的文件
echo "log_src_dir:"$log_src_dir
ls $log_src_dir | while read fileName
do
        if [[ "$fileName" == access.log.* ]]; then
                date=`date +%Y_%m_%d_%H_%M_%S`
                echo "moving $log_src_dir$fileName to $log_toupload_dir"xxxxx_click_log_$fileName"$date"
                mv $log_src_dir$fileName $log_toupload_dir"xxxxx_click_log_$fileName"$date
                echo $log_toupload_dir"xxxxx_click_log_$fileName"$date >> $log_toupload_dir"willDoing."$date
        fi
done
#找到列表文件willDoing
ls $log_toupload_dir | grep will | grep -v "_COPY_" | grep -v "_DONE_" | while read line
do
        echo "toupload is in file:"$line
        mv $log_toupload_dir$line $log_toupload_dir$line"_COPY_"
        cat $log_toupload_dir$line"_COPY_" | while read line
        do
                echo "puting...$line to hdfs path....$hdfs_root_dir"
                hadoop fs -mkdir -p $hdfs_root_dir
                hadoop fs -put $line $hdfs_root_dir
        done
        mv $log_toupload_dir$line"_COPY_" $log_toupload_dir$line"_DONE_"
done
3、在目录/export/data/logs/log下创建文件
access.log.1和access.log.2等等并添加一些内容
4、sh upload2HDFS.sh
运行结果可看UI

下面是纯shell代码upload2HDFD

#!/bin/bash
export JAVA_HOME=/export/servers/jdk
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:JAVA_HOME/lib/tools.jar
export HADOOP_HOME=/export/servers/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

log_src_dir=/export/data/logs/log/
log_toupload_dir=/export/data/logs/toupload/

date1=`date -d last-day +%Y_%m_%d`
hdfs_root_dir=/data/clickLog/$date1/
echo "envs:hadoop_home:$HADOOP_HOME"
echo "log_src_dir:"$log_src_dir

ls $log_src_dir | while read fileName
do
if [[ "$fileName"==access.log.* ]]; then
        date=`date +%Y_%m_%d_%H_%M_%S`
        echo "moving $log_src_dir$fileName to $log_toupload_dir"xxxxx_click_log_$fileName$date
fi
done

ls $log_toupload_dir | grep will | grep -v "_COPY_" |grep -v "_DONE_" | while read line
do
echo "toupload is in file:"$line
mv $log_toupload_dir$line $log_toupload_dir$line"_COPY_"
cat $log_toupload_dir$line"_COPY_" | while read line
do
        echo "putting ...$line to hdfs path...$hdfs_root_dir"
        hadoop fs -mkdir -p $hdfs_root_dir
        hadoop fs -put $line $hdfs_root_dir
done
mv $log_toupload_dir$line"_COPY_" $log_toupload_dir$line"_DONE_"
done

本文含有隐藏内容,请 开通VIP 后查看