0


大数据技术原理与应用(第三版)林子雨教材配套实验答案---实验二 熟悉常用的hdfs操作

1.编程实现以下指定功能,并利用 Hadoop 提供的 Shell 命令完成相同任务;

1.1 向 HDFS 中上传任意文本文件,如果指定的文件在 HDFS 中已经存在,由用户指定是追加到原有文件末尾还是覆盖原有的文件;

shell

#检查文件是否存在
hdfs dfs -test-e /hdfstestfile.txt
#查看结果,0表示存在,1表示不存在echo$?#文件已经存在,追加到原文件末尾
hdfs dfs -appendTOFile localtestfile.txt /hdfstestfile.txt
#文件已经存在,追加到原文件末尾
hdfs dfs -copyFromLocal-f localtestfile.txt /hdfstestfile.txt

java

importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.*;importjava.io.*;publicclassAddFile_0101{/**
    *判断路径是否存在
    */publicstaticbooleantest(Configuration conf,String path)throwsIOException{FileSystem fs =FileSystem.get(conf);
        returm fs.exists(newPath(path));}/**
    *复制文件到指定路径
    *若路径已存在,则进行覆盖
    */publicstaticvoidcopyFromLocalFile(Configuration conf,String localFilePath,String remoteFilePath)throwsIOException{FileSystem fs =FileSystem.get(conf);Path localPath =newPath(localFilePath);Path remotePath =newPath(remoteFilePath);//fs.copyFromLocalFile第一个参数表示是否删除源文件,第二个参数表示是否覆盖
        fs.copyFromL ocalFile(false,true, localPath, remotePath);
        fs.close();}/**
    *追加文件内容
    */publicstaticvoidappendToFile(Configuration conf,String localFilePath,String remoteFilePath)throwsIOException{FileSystem fs =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);//创建一个文件读入流FileInputStream in =newFileInputStream(localFilePath);//创建一个文件输出流,输出的内容将追加到文件末尾FSDataOutputStream out = fs.append(remotePath);//读写文件内容byte[] data =newbyte[1024];int read =-1;while((read = in.read(data))>0){
            out.write(data,0, read);}    
        out.close;
        in.close0;
        fs.close0;}/**
    *主函数
    */publicstaticvoidmain(String args){Configuration conf =newConfiguration0;
        conf.set("fs.default.name","hdfs://localhost:8020");//本地路径String localFilePath ="/text.txt";//hdfs路径String remoteFilePath ="/text.txt";//若文件存在则追加到文件末尾//String choice = "append";//若文件存在则覆盖String choice ="overwrite";try{//判断文件是否存在Boolean fileExists =false;if(AddFile_0101.test(conf, remoteFilePath)){
                fileExists =true;System.out.println(remoteFilePath+"已经存在。。。");}else{System.out.println(remoteFilePath+"不存在。。。");}//开始进行处理if(!fileExits){//文件不存在,则上传AddFile_0101.copyFromLocalFile(conf, localFilePath, remoteFilePath);System.out.println(localFilePath+"已经上传到"+ remoteFilePath);}elseif( choice.equals("overwrite")){//选择覆盖AddFile_0101.copyFromL ocalFile(conf, localFilePath, remoteFilePath);System.out.println(localFilePath+"已经覆盖到"+ remoteFilePath);}elseif( choice .equals("append")){//选择追加AddFile_0101.appendToFile(conf, localFilePath, remoteFilePath);System.out.println(localFilePath +" 已经追加到 "+ remoteFilePath);}}catch(Exceptione e){
            e.printStackTrace0;}}}

1.2 从 HDFS 中下载指定文件,如果本地文件与要下载的文件名称相同,则自动对下载的文件重命名;

shell

if$(hdfs dfs -test-e file://home/test.txt);then$(hdfs dfs -copyToLocal test.txt /test2.txt);else$(hdfs dfs -copyToLocal test.txt /test.txt);fi

java

importorg.apache .hadoop.conf.Configuration;importorg.apache.hadoop.fs.*;importjava.io.*;publicclassHDFSApi_0102(/**
    *下载文件到本地
    *判断本地路径是否已存在,若已存在,则自动进行重命名
    */publicstaticvoidcopyToLocal(Configuration conf,String remoteFilePath,String localFilePath)throwsIOException{FileSystem fs =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);File f=newFile(localFilePath);//如果文件名存在,自动重命名if(f.exists()){System.out.println(localFilePath+"已经存在。。。");Integeri=0;while(true){
                f=newFile(localFilePath +"_"+ i.toStringO);if(!f.exists()){
                    localFilePath = localFilePath+"_"+ i.toStringO;break;}}System.out.println("文件将重新命名为:"+ localFilePath);}//下载文件到本地Path localPath =newPath(localFilePath);
        fs.copyToLocalFile(remotePath, localPath);
        fs.close();}/**
    *主函数
    */publicstaticvoidmain(String] args){Configuration conf =newConfiguration0;
        conf.set("fs.default.name","hdfs://localhost:8020");String localFilePath ="/home/text.txt";String remoteFilePath ="/text.txt";try{HDFSApi0102.copyToLocal(conf, remoteFilePath, localFilePath);System.out.println("下载完成");}catch(Exception e){
            e.printStackTrace();}}}

1.3 将 HDFS 中指定文件的内容输出到终端中;

shell

hdfs dfs -cat /test.txt

java

importorg.apache.hadoop.conf.Configuration;importorg.apache .hadoop.fs.*;importjava.io.*;

publie classHDFSApi_0103(/**
    *读取文件内容
    */publicstaticvoidcat(Configuration conf,String remoteFilePath)throwsIOException{FileSystem fs =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);FSDataInputStream in = fs.open(remotePath);BufferedReader d =newBufferedReader(newInputStreamReader(in));String line =null;while((line = d.readLine)!=null){System.out.println(line);}    
        d.close();
        in.close();
        fs.close();}/**
    *主函数
    */publicstaticvoidmain(String] args){Configuration conf =newConfiguration();
        conf.set("fs.default.name","hdfs://localhost:8020");String remoteFilePath ="/user/hadoop/text.txt";try{System.out.println("读取文件: "+ remoteFilePath);HDFSApi_0103.cat(conf, remoteF ilePath);System.out.println("\n 读取完成!!!");}catch(Exception e){
            e.printStackTrace0;}}}

1.4 显示 HDFS 中指定的文件的读写权限、大小、创建时间、路径等信息;

shell

hdfs dfs -ls-h /test.txt

java

importorg.apache .hadoop.conf.Configuration;importorg.apache.hadoop.fs.*;importjava.io.*;importjava.text.SimpleDateFormat;publicclassHDFSApi_0104{/**
    *显示指定文件的信息
    */publicstaticvoidls(Configuration conf,String remoteFilePath)throwsIOException{FileSystem fis =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);FileStatus[] fileStatuses = fs.listStatus(remotePath);for(FileStatus s : fileStatuses){System.out.println("路径:"+ s.getPath().toString());System.out.println("权限:"+ s.getPermission().toString());System.out.println("大小:"+ s.getLen());//返回的是时间戳,转化为时间日期格式Long timeStamp = s.getModificationTime();SimpleDateFormat format =newSimpleDateFormat("yyyy-MM-dd HH:mm:ss");String date = format.format(timeStamp);System.out.println("时间: "+ date);}
        fs.close();}/**
    *主函数
    */publicstaticvoidmain(String[] args){Configuration conf =newConfiguration();
        conf.set("fs.default.name","hdfs://localhost:8020");String remoteFilePath ="/text.txt";try{System.out.println("读取文件信息: "+ remoteFilePath);HDFSApi_0104.ls(conf, remoteFilePath);System.out.println("\n 读取完成");}catch(Exception e){
            e.printStackTrace();}}}

1.5 给定 HDFS 中某一个目录,输出该目录下的所有文件的读写权限、大小、创建时间、路径等信息,如果该文件是目录,则递归输出该目录下所有文件相关信 息;

shell

hdfs dfs -ls-R-h /hadoop

java

importorg.apache.hadoop.conf.Configuration;importorg.apache..hadoop.fs.*;importjava.io.*;importjava.text.SimpleDateFormat;publicclassHDFSApi_0105{/**
    *显示指定文件夹下所有文件的信息
    */public statíc voidlsDir(Configuration conf,String remoteDir)throwsIOException{FileSystem fs =FileSystem.get(conf);Path dirPath =newPath(remoteDir);//递归获取目录下的所有文件RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(dirPath,true);//输出每个文件的信息while(remoteIterator.hasNext()){FileStatus s = remoteIterator .next();System.out.println("路径:"+ s.getPath().toString());System.out.println("权限:"+ s.getPermission().toString());System.out.println("大小:"+ s.getLen());//返回的是时间戳,转化为时间日期格式Long timeStamp = s.getModificationTime();SimpleDateFormat format =newSimpleDateFormat("yyy-MM-dd HH:mm:ss");String date = format.format(timeStamp);System.out.println("时间:"+ date);System.out.println0;}    
        fs.close();}/**
    *主函数
    */publicstaticvoidmain(String[] args){Configuration conf=newConfiguration();
        conf.set("fs.default.name","hdfs://localhost:8020");String remoteDir ="/user/hadoop";try{System.out.println("(递归)读取目录下所有文件的信息: "+ remoteDir);HDFSApi_0105.lsDir(conf, remoteDir);System.out.println("读取完成");}catch(Exception e){
            e.printStackTrace();}}}

1.6 提供一个 HDFS 内的文件的路径,对该文件进行创建和删除操作。如果文件所在目录不存在,则自动创建目录;

shell

$ if$(hdfs dfs -test-d dir1/dir2);
$ then$(hdfs dfs -touchz dir1/dir2/filename);
$ else$(hdfs dfs -mkdir-p dir1/dir2 && hdfs dfs -touchz dir1/dir2/filename);
$ fi#删除文件
$ hdfs dfs -rm dirl/dir2/filename

java

importorg.apache.hadoop.conf.Configuration;importorg.apache .hadoop.fs.*;importjava.io.*;publicclassHDFSApi_0106{/**
    *判断路径是否存在
    */publicstaticbooleantest(Configuration conf,String path)throwsIOException{FileSystem fs =FileSystem.get(conf);return fs.exists(newPath(path));}/*
    *创建目录
    */publicstaticbooleanmkdir(Configuration conf,String remoteDir)throwsIOException{FileSystem fs =FileSystem.get(conf);Path dirPath =newPath(remoteDir);boolean result = fs.mkdirs(dirPath);
        fs.close();return result;}/**
    *创建文件
    */publicstaticvoidtouchz(Configuration conf,String remoteFilePath)throwsIOException{FileSystem fs =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);FSDataOutputStream outputStream = fs ,create(remotePath);
        outputStream.close();
        fs.close();}/**
    *删除文件
    */publicstaticbooleanrm(Configuration conf,String remoteFilePath)throwsIOException{FileSystem fs =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);boolean result = fs.delete(remotePath,false);
        fs.close();return result;}/*
    *主函数
    */publicstaticvoidmain(String[] args){Configuration conf =newConfiguration();
        conf.set("fs.default.name","hdfs:/localhost:8020");String remoteFilePath ="/user/hadoop/input/text.txt";String remoteDir ="/user/hadoop/input";try{//判断路径是否存在,存在则删除,否则进行创建if(HDFSApi_0106.test(conf, remoteFilePath)){HDFSApi_0106.rm(conf, remoteFilePath);11H4F€
                System.out.println("删除路径:"+ remoteFilePath);}else{if(!HDFSApi_0106.test(conf, remoteDir)){//目录不存在进行创建HDFSApi_0106.mkdir(conf, remoteDir);System.out.println("创建文件夹:"+ remoteDir);}HDFSApi_0106.touchz(conf, remoteFilePath);System.out.println("创建路径:"+ remoteFilePath);}}catch(Exception e){
            e.printStackTrace();}}}

1.7 提供一个 HDFS 的目录的路径,对该目录进行创建和删除操作。创建目录时, 如果目录文件所在目录不存在则自动创建相应目录;删除目录时,由用户指定 当该目录不为空时是否还删除该目录;

shell

#创建目录
hdfs dfs -mkdir-p#删除目录
hdfs dfs -rmdir#强制删除目录
hdfs dfs -rm-Rdir

java

importorg.apache.hadoop.conf.Configuration;importorg.apache..hadoop.fs.*;importjava.io.*;publicclassHDFSApi_0107{/**    *判断路径是否存在    */publicstaticbooleantest(Configuration conf,String path)throwsIOException{FileSystem fs =FileSystem.get(conf);return fs.exists(newPath(path));}/**    *判断目录是否为空    *true:空,false: 非空    */publicstaticbooleanisDirEmpty(Configuration conf,String remoteDir)throwsIOException{FileSystem fs =FileSystem.get(conf);Path dirPath =newPath(remoteDir);Remotelterator<LocatedFileStatus> remotelterator = fs.listFiles(dirPath,true);return!remotelterator.hasNext();}/**    *创建目录    */publicstaticbooleanmkdir(Configuration conf,String remoteDir)throwsIOException{FileSystem fs =FileSystem.get(conf);Path dirPath =newPath(remoteDir);boolean result = fs.mkdirs(dirPath);        fs.close();return result;}/**    *删除目录    */publicstaticbooleanrmDir(Configuration conf,String remoteDir)throwsIOException{FileSystem fs =FileSystem.get(conf);Path dirPath =newPath(remoteDir);//第二个参数表示是否递归删除所有文件        boolean result = fs.delete(dirPath, true);        fs.close();        return result;    }        /**    *主函数    */    public static void main(String[] args) {        Configuration conf = new Configuration();        conf.set("fs.default.name","hdfs:/localhost:8020");        String remoteDir = "/user/hadoop/input";        //是否强制删除        Boolean forceDelete = false;         try {            //判断目录是否存在,不存在则创建,存在则删除            if( !HDFSApi_0107.test(conf, remoteDir) ) {                HDFSApi_0107.mkdir(conf, remoteDir);                System.out.println("创建目录:" + remoteDir);            } else {                if( HDFSApi_0107.isDirEmpty(conf, remoteDir) || forceDelete) {                    HDFSApi_0107.rmDir(conf, remoteDir);                    System.out.println("删除目录" + remoteDir);                }else {                    // 目录不为空                    Systerm.out.println("目录不为空,不删除: " + remoteDir);                }            }        }catch (Exception e) {            e.printStackTrace();        }    }}                

1.8 向 HDFS 中指定的文件追加内容,由用户指定内容追加到原有文件的开头或结尾;

shell

hdfs dfs -appendToFile local.txt test.txt
hdfs dfs -get test.txt
cat test.txt>>local.txt
hdfs dfs -copyFromLocal-f local.txt test.txt

java

importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.*;importjava.io.*;publicclassHDFSApi_0108{/*
    *判断路径是否存在
    */publicstaticbooleantest(Configuration conf,String path)throwsIOException{FileSystem fs =FileSystem.get(conf);return fs.exists(newPath(path));}/**
    *追加文本内容
    */publicstaticvoidappendContentToFile(Configuration conf,String content,String remoteFilePath)throwsOException{FileSystem fs =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);/*创建一个文件输出流,输出的内容将追加到文件末尾*/FSDataOutputStream out = fs.append(remotePath);
        out.write(content.getBytes0);
        out.close();
        fs.close();}/**
    *追加文件内容
    */publicstaticvoidappendToFile(Configuration conf,String localFilePath,String remoteFilePath)throwsIOException{FileSystem fs =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);/*创建一个文件读入流*/FileInputStream in =newFileInputStream(localFilePath);/*创建一个文件输出流,输出的内容将追加到文件末尾*/FSDataOutputStream out = fs.append(remotePath);/*读写文件内容*/byte data =newbyte(1024);int read =-l;while((read = in.read(data))>0){
            out.write(data,0, read);}    
        out.close();
        in.close();
        fs.close();}/**
    *移动文件到本地
    *移动后,删除源文件
    */publicstaticvoid moveToL ocalFile(Configuration conf,String remoteFilePath,String localFilePath)throwsIOException{FileSystem fis =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);Path localPath =newPath(localFilePath);
        fs.moveToLocalFile(remotePath, localPath);}/**
    *创建文件
    */publicstaticvoidtouchz(Configuration conf,String remoteFilePath)throwsIOException{FileSystem fis =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);FSDataOutputStream outputStream = fs.create(remotePath);
        outputStream.close();
        fs.close();}/**
    *主函数
    */publicstaticvoidmain(String[] args){Configuration conf =newConfiguration0;
        conf.set("fs. default.name","dfs://localhost:8020");String remoteFilePath ="/user/hadoop/text.txt";String content ="新追加的内容\n";String choice ="after";//追加到文件末尾//String choice = "before";//追加到文件开头try{/*判断文件是否存在*/if(!HDFSApi_0108.test(conf, remoteFilePath)){System.out.println("文件不存在:"+ remoteFilePath);}else{if( choice.equals("after")){HDFSApi_0108.appendContentToFile(conf, content, remoteFilePath);System.out.printn("已追加内容到文件末尾"+ remotcFilePath);}elseif( choice equals("before")){/*没有相应的api可以直接操作,因此先把文件移动到本地*//*创建-一个新的HDFS,再按顺序追加内容*/String localTmpPath ="/user/hadoop/tmp.txt";HDFSApi_0108.moveToLocalFile(conf, remoteFilePath, localTmpPath);//创建一个新文件HDFSApi_0108.touchz(conf, remoteFilePath);//先写入新内容HDFSApi_0108.appendContentToFile(conf, content, remoteFilePath);//再写入原来内容HDFSApi_0108.appendToFile(conf, localTmpPath, remoteFilePath);System.out.printn("已追加内容到文件开头: "+ remoteFilePath);}}}catch(Exception e){
            e.printStackTrace();}}}

1.9 删除 HDFS 中指定的文件;

shell

hdfs dfs -rm test.txt

java

importorg.apache..hadoop.conf.Configuration;importorg.apache.hadoop.fs.*;importjava.io.*;publicclassHDFSApi{/**
    *删除文件
    */publicstaticbooleanrm(Configuration conf,String remoteFilePath)throwsIOException{FileSystem fs =FileSystem.get(conf);Path remotePath =newPath(remoteFilePath);boolean result = fs.delete(remotePath,false);
        fs.close();return result;}/**
    * 主函数
    */publicstaticvoidmain(String args){Configuration conf =newConfiguration0;
        conf.set("fs.default.name","hdfs://localhost:8020");String remoteFilePath ="/user/hadoop/text.txt";try{if(HDFSApi.rm(conf, remoteFilePath)){System.out.println("文件删除:"+ remoteFilePath);}else{System.out.println("操作失败(文件不存在或删除失败) ");}}catch(Exception e){
            e.printStackTrace();}}}

1.10 移动hdfs文件到指定路径

shell

hdfs dfs -mv test.txt dir/test.txt

java

importorg.apache .hadoop.conf.Configuration;importorg. apache .hadoop.fs.*;importjava.io.*;publicclassHDFSApi{/**
    *移动文件
    */publicstaticbooleanmv(Configuration conf,String remoteFilePath,String remoteToFilePath)throwsIOException{FileSystem fs =FileSystem.get(conf);Path srcPath =newPath(remoteFilePath);Path dstPath =newPath(remoteToFilePath);boolean result = fs.rename(srcPath, dstPath);
        fs.close0;return result;}/**
    *主函数
    */publicstaticvoidmain(String[] args){Configuration conf =newConfiguration0;
        conf.set("fs. default.name","dfs://localhost:8020");String remoteFilePath ="hfds:///user/hadoop/text.txt";//源文件HDFS路径String remoteToFilePath ="hdfs://user/hadoop/new.txt";try{if(HDFSApi.mv(conf, remoteFilePath, remote ToFilePath)){System.out.println("将文件移动到:"+ remoteFilePath+"移动到:"+ remoteToFilePath);}else{System.out.printIn("操作失败(源文件不存在或移动失败)");}}catch(Exception e){
            e.printStackTrace();}}}

2.编程实现一个类“MyFSDataInputStream”,该类继承“org.apache.hadoop.fs.FSDataInput Stream”,要求如下:实现按行读取 HDFS 中指定文件的方法“readLine()”,如果读到文件末尾,则返回空,否则返回文件一行的文本。

importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.FSDatalnputStream;importorg.apache .hadoop.fs.FileSystem;importorg.apache .hadoop.fs .Path;importjava.io.*;publicclassMyFSDataInputStreamextendsFSDataInputStream(publicMyFSDataInputStream(InputStream in){super(in);}/**
    *实现按行读取
    *每次读入-一个字符,遇到"\n"结束, 返回一行内容
    */publicstaticStringreadline(BufferedReader br)throwsIOException{char[] data =newchar[1024];int read =-1;int off=0;// 循环执行时,br每次会从上- -次读取结束的位置继续读取//因此该函数里,off 每次都从0开始while((read = br.read(data, off,1))!=-1){if(String.valueOf(ata[f]).equals<("\n")){
                off +=1;break;}
            off +=1;}if(off>0){returnString.valueOf(data);}else{returnnull;}}/**        
    *读取文件内容
    */publicstaticvoidcat(Configuration conf,String remoteFilePath)throwsIOException{FileSystem fs =FileSystem.get(conf);Path remotcPath =newPath(remoteFilePath);FSDataInputStream in = fs.open(remotePath);BufferedReader br =newBufferedReader(newInputStreamReader(in));String line =null;while((line =MyFSDatalnputStream.readline(br))!=null){System.out.printn(line);}    
        br.close();
        in.close();
        fs.closeO;}/**    
    *主函数
    */publicstaticvoidmain(String[] args){Configuration conf =newConfiguration0;
        conf.set("fs.default.name","hdfs://ocalhost:9000");String remoteFilePath ="/user/hadoop/text.txt";try{MyFSDataInputStream.cat(conf, remoteFilePath);}catch(Exception e){
            e.printStackTrace();}}}

3.java.net

publicclassFsUrl{static{URL. setURLSt reamHandlerFactory(newFsUrlStreamHandlerFactory());}publicstaticvoidcat(String remoteFilePath){try(InputStream in =newURL("hdfs","localhost",9000, remoteFilePath).openStream()){IOUtils.copyBytes(in,System.out,4096,false);I0Utils.closeStream(in);}catch(I0Exception e){
            e.printStackTrace();}}
标签: hdfs 大数据 hadoop

本文转载自: https://blog.csdn.net/MarioPeng/article/details/125923560
版权归原作者 MarioPeng 所有, 如有侵权,请联系我们删除。

“大数据技术原理与应用(第三版)林子雨教材配套实验答案---实验二 熟悉常用的hdfs操作”的评论:

还没有评论