HBase administration using the Java API, using code examples

I have not given a formal introduction on HBase, but this post will help those who have already set up and have an active HBase installation. I will be dealing with the administrative work that can be done on HBase using the Java API. The API is vast and easy to use. I have explained the code wherever I find it necessary, but this
post is by all means incomplete. I have as usual, provided the full code at the end. Cheers. 🙂

If you want to follow along your better import all this, or if you are using an IDE like Eclipse, you’ll follow along just fine as it automatically fixes up your imports. The only thing you need to do is to set the class path to include all the jar files from the hadoop installation and/or hbase installation, especially the hadoop-0.*.*-core.jar and the jar files inside the lib folder. I’ll put in another post on that later.


import java.io.IOException;
import java.util.Collection;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.util.Bytes;

1. Creating a table in HBase

    public void createTable (String tablename, String familyname) throws IOException {

        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);

        HTableDescriptor tabledescriptor = new HTableDescriptor(Bytes.toBytes(tablename));

        tabledescriptor.addFamily(new HColumnDescriptor (familyname));

        admin.createTable(tabledescriptor);

    }

2. Adding a column to an existing table

    public void addColumn (String tablename, String  colunmnname) throws IOException{

        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.addColumn(tablename, new HColumnDescriptor (colunmnname));
        System.out.println("Added column : " + colunmnname + "to table " + tablename);
    }

3. Deleting a column to an existing table

    public void delColumn (String tablename, String  colunmnname) throws IOException{

        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.deleteColumn(tablename, colunmnname);
        System.out.println("Deleted column : " + colunmnname + "from table " + tablename);
    }

4. Check if your hbase cluster is running properly

    public static void checkIfRunning() throws MasterNotRunningException, ZooKeeperConnectionException{
        //Create the required configuration.
        Configuration conf = HBaseConfiguration.create();
        //Check if Hbase is running
        try{
        HBaseAdmin.checkHBaseAvailable(conf);
        }catch(Exception e){
            System.err.println("Exception at " + e);
            System.exit(1);
        }
    }

5. Major compaction

    public void majorCompact (String mytable) throws IOException{

        //Create the required configuration.
        Configuration conf = HBaseConfiguration.create();
        //Instantiate a new client.
        HTable table = new HTable(conf,mytable);

        HBaseAdmin admin = new HBaseAdmin(conf);

        String tablename = table.toString();
        try{
        admin.majorCompact(tablename);
        System.out.println("Compaction done!");
        }catch(Exception e){
            System.out.println(e);
        }
    }

6. Minor compaction

    public void minorcompact(String trname) throws IOException, InterruptedException{
        //Create the required configuration.
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.compact(trname);
    }

7. Print out the cluster status.

    public ClusterStatus getclusterstatus () throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        return admin.getClusterStatus();
    }

8. Get all the cluster details.

    public void printClusterDetails() throws IOException{
        ClusterStatus status = getclusterstatus();

        status.getServerInfo();
        Collection serverinfo =  status.getServerInfo();
        for (HServerInfo s : serverinfo){
            System.out.println("Servername " + s.getServerName());
            System.out.println("Hostname " + s.getHostname());
            System.out.println("Hostname:Port " + s.getHostnamePort());
            System.out.println("Info port" + s.getInfoPort());
            System.out.println("Server load " + s.getLoad().toString());
            System.out.println();
        }

        String version = status.getHBaseVersion();
        System.out.println("Version " + version);

        int regioncounts = status.getRegionsCount();
        System.out.println("Regioncounts :" + regioncounts);

        int servers = status.getServers();
        System.out.println("Servers :" + servers);

        double averageload = status.getAverageLoad();
        System.out.println("Average load: " + averageload);

        int deadservers = status.getDeadServers();
        System.out.println("Deadservers : " + deadservers);

        Collection  Servernames = status.getDeadServerNames();
        for (String s : Servernames  ){
            System.out.println("Dead Servernames " + s);
        }
}

9. Disable a table.

    public void disabletable(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.disableTable(tablename);
    }

10. Enable a table

    public void enabletable(String tablename) throws IOException{
        //Create the required configuration.
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.enableTable(tablename);
    }

11. Delete a table.

    public void deletetable(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.deleteTable(tablename);
    }

12. Check if table is available

   public void isTableAvailable(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        boolean result = admin.isTableAvailable(tablename);
        System.out.println("Table " + tablename + " available ?" + result);
    }

13. Check if table is enabled

    public void isTableEnabled(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        boolean result = admin.isTableEnabled(tablename);
        System.out.println("Table " + tablename + " enabled ?" + result);
    }

14. Check if table is disabled

    public void isTableDisabled(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        boolean result = admin.isTableDisabled(tablename);
        System.out.println("Table " + tablename + " disabled ?" + result);
    }

15. Check if table exists.

    public void tableExists(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        boolean result = admin.tableExists(tablename);
        System.out.println("Table " + tablename + " exists ?" + result);
    }

16. List all tables

    public void listTables () throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.listTables();
    }

17. Flush tables.

   public void flush(String trname) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.disableTable(trname);
    }

18. Shutdown hbase.

    public void shutdown() throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        System.out.println("Shutting down..");
        admin.shutdown();
    }

19. Modify column for a table.

    @SuppressWarnings("deprecation")
    public void modifyColumn(String tablename, String columnname, String descriptor) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.modifyColumn(tablename, columnname, new HColumnDescriptor(descriptor));

    }

20. Modify the avilable table.

    public void modifyTable(String tablename, String newtablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.modifyTable(Bytes.toBytes(tablename), new HTableDescriptor(newtablename));

    }

21. Split based on tablename.

    public void split(String tablename) throws IOException, InterruptedException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.split(tablename);
    }

22. Check if master is running.

    public void isMasterRunning() throws MasterNotRunningException, ZooKeeperConnectionException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin administer = new HBaseAdmin(conf);
        System.out.println( "Master running ? "+ administer.isMasterRunning());
    }

There are lots more, you can check the Java API for HBase and prepare more. I found all this necessary. And some well..

The full listing of the code:

/*
 * Hbase administration basic tools.
 *
 * */

import java.io.IOException;
import java.util.Collection;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.util.Bytes;

public class HbaseAdmin {

    public HbaseAdmin(){

    }

    public void addColumn (String tablename, String  colunmnname) throws IOException{

        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.addColumn(tablename, new HColumnDescriptor (colunmnname));
        System.out.println("Added column : " + colunmnname + "to table " + tablename);
    }

    public void delColumn (String tablename, String  colunmnname) throws IOException{

        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.deleteColumn(tablename, colunmnname);
        System.out.println("Deleted column : " + colunmnname + "from table " + tablename);
    }

    public void createTable (String tablename, String familyname) throws IOException {

        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);

        HTableDescriptor tabledescriptor = new HTableDescriptor(Bytes.toBytes(tablename));

        tabledescriptor.addFamily(new HColumnDescriptor (familyname));

        admin.createTable(tabledescriptor);

    }

    public void majorCompact (String mytable) throws IOException{

        //Create the required configuration.
        Configuration conf = HBaseConfiguration.create();
        //Instantiate a new client.
        HTable table = new HTable(conf,mytable);

        HBaseAdmin admin = new HBaseAdmin(conf);

        String tablename = table.toString();
        try{
        admin.majorCompact(tablename);
        System.out.println("Compaction done!");
        }catch(Exception e){
            System.out.println(e);
        }
    }

    public static void checkIfRunning() throws MasterNotRunningException, ZooKeeperConnectionException{
        //Create the required configuration.
        Configuration conf = HBaseConfiguration.create();
        //Check if Hbase is running
        try{
        HBaseAdmin.checkHBaseAvailable(conf);
        }catch(Exception e){
            System.err.println("Exception at " + e);
            System.exit(1);
        }
    }

    public void minorcompact(String trname) throws IOException, InterruptedException{
        //Create the required configuration.
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.compact(trname);
    }

    public void deletetable(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.deleteTable(tablename);
    }

    public void disabletable(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.disableTable(tablename);
    }

    public void enabletable(String tablename) throws IOException{
        //Create the required configuration.
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.enableTable(tablename);
    }

    public void flush(String trname) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.disableTable(trname);
    }

    public ClusterStatus getclusterstatus () throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        return admin.getClusterStatus();
    }

    public void printClusterDetails() throws IOException{
        ClusterStatus status = getclusterstatus();

        status.getServerInfo();
        Collection serverinfo =  status.getServerInfo();
        for (HServerInfo s : serverinfo){
            System.out.println("Servername " + s.getServerName());
            System.out.println("Hostname " + s.getHostname());
            System.out.println("Hostname:Port " + s.getHostnamePort());
            System.out.println("Info port" + s.getInfoPort());
            System.out.println("Server load " + s.getLoad().toString());
            System.out.println();
        }

        String version = status.getHBaseVersion();
        System.out.println("Version " + version);

        int regioncounts = status.getRegionsCount();
        System.out.println("Regioncounts :" + regioncounts);

        int servers = status.getServers();
        System.out.println("Servers :" + servers);

        double averageload = status.getAverageLoad();
        System.out.println("Average load: " + averageload);

        int deadservers = status.getDeadServers();
        System.out.println("Deadservers : " + deadservers);

        Collection  Servernames = status.getDeadServerNames();
        for (String s : Servernames  ){
            System.out.println("Dead Servernames " + s);
        }

    }

    public void isTableAvailable(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        boolean result = admin.isTableAvailable(tablename);
        System.out.println("Table " + tablename + " available ?" + result);
    }

    public void isTableEnabled(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        boolean result = admin.isTableEnabled(tablename);
        System.out.println("Table " + tablename + " enabled ?" + result);
    }

    public void isTableDisabled(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        boolean result = admin.isTableDisabled(tablename);
        System.out.println("Table " + tablename + " disabled ?" + result);
    }

    public void tableExists(String tablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        boolean result = admin.tableExists(tablename);
        System.out.println("Table " + tablename + " exists ?" + result);
    }

    public void shutdown() throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        System.out.println("Shutting down..");
        admin.shutdown();
    }

    public void listTables () throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.listTables();
    }

    @SuppressWarnings("deprecation")
    public void modifyColumn(String tablename, String columnname, String descriptor) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.modifyColumn(tablename, columnname, new HColumnDescriptor(descriptor));

    }

    public void modifyTable(String tablename, String newtablename) throws IOException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.modifyTable(Bytes.toBytes(tablename), new HTableDescriptor(newtablename));

    }

    public void split(String tablename) throws IOException, InterruptedException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin admin = new HBaseAdmin(conf);
        admin.split(tablename);
    }

    public void isMasterRunning() throws MasterNotRunningException, ZooKeeperConnectionException{
        Configuration conf = HBaseConfiguration.create();
        HBaseAdmin administer = new HBaseAdmin(conf);
        System.out.println( "Master running ? "+ administer.isMasterRunning());
    }

    public static void main (String[] args) throws IOException{

        
        HbaseAdmin admin = new HbaseAdmin();

        //Check if Hbase is running properly
        HbaseAdmin.checkIfRunning();
        admin.printClusterDetails();

        //other functions based on arguments.

    }
}

And hey, I’ll update this post soon with more details especially about compaction and stuff! Till then Cheers! 🙂

HBase administration using the Java API, using code examples

A HDFSClient for Hadoop using the native JAVA API, a tutorial

I’d like to talk about doing some day to day administrative task on the Hadoop system. Although the hadoop fs <commands> can get you to do most of the things, its still worthwhile to explore the rich API in Java for Hadoop. This post is by no means complete, but can get you started well.

The most basic step is to create an object of this class.

HDFSClient client = new HDFSClient();

Of course, you need to import a bunch of stuff. But if you are using an IDE like Eclipse, you’ll follow along just fine just by importing these. This should word fine for the entire code.

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;

1. Copying from Local file system to HDFS.
Copies a local file onto HDFS. You do have the hadoop file system command to do the same.

hadoop fs -copyFromLocal <local fs> <hadoop fs>

I am not explaining much here as the comments are quite helpful. Of course, while importing the configuration files, make sure to point them to your hadoop systems location. For mine, it looks like this:

Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

This is how the Java API looks like:

public void copyFromLocal (String source, String dest) throws IOException {

Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);
Path srcPath = new Path(source);

Path dstPath = new Path(dest);
// Check if the file already exists
if (!(fileSystem.exists(dstPath))) {
System.out.println("No such destination " + dstPath);
return;
}

// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());

try{
fileSystem.copyFromLocalFile(srcPath, dstPath);
System.out.println("File " + filename + "copied to " + dest);
}catch(Exception e){
System.err.println("Exception caught! :" + e);
System.exit(1);
}finally{
fileSystem.close();
}
}

2.Copying files from HDFS to the local file system.

The hadoop fs command is the following.

hadoop fs -copyToLocal <hadoop fs> <local fs>

Alternatively,

hadoop fs -copyToLocal
public void copyFromHdfs (String source, String dest) throws IOException {

Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);
Path srcPath = new Path(source);

Path dstPath = new Path(dest);
// Check if the file already exists
if (!(fileSystem.exists(dstPath))) {
System.out.println("No such destination " + dstPath);
return;
}

// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());

try{
fileSystem.copyToLocalFile(srcPath, dstPath)
System.out.println("File " + filename + "copied to " + dest);
}catch(Exception e){
System.err.println("Exception caught! :" + e);
System.exit(1);
}finally{
fileSystem.close();
}
}

3.Renaming a file in HDFS.

You can use the mv command in this context.

hadoop fs -mv <this name> <new name>
public void renameFile (String fromthis, String tothis) throws IOException{
Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);
Path fromPath = new Path(fromthis);
Path toPath = new Path(tothis);

if (!(fileSystem.exists(fromPath))) {
System.out.println("No such destination " + fromPath);
return;
}

if (fileSystem.exists(toPath)) {
System.out.println("Already exists! " + toPath);
return;
}

try{
boolean isRenamed = fileSystem.rename(fromPath, toPath);
if(isRenamed){
System.out.println("Renamed from " + fromthis + "to " + tothis);
}
}catch(Exception e){
System.out.println("Exception :" + e);
System.exit(1);
}finally{
fileSystem.close();
}

}

4.Upload or add a file to HDFS

public void addFile(String source, String dest) throws IOException {

// Conf object will read the HDFS configuration parameters
Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());

// Create the destination path including the filename.
if (dest.charAt(dest.length() - 1) != '/') {
dest = dest + "/" + filename;
} else {
dest = dest + filename;
}

// Check if the file already exists
Path path = new Path(dest);
if (fileSystem.exists(path)) {
System.out.println("File " + dest + " already exists");
return;
}

// Create a new file and write data to it.
FSDataOutputStream out = fileSystem.create(path);
InputStream in = new BufferedInputStream(new FileInputStream(
new File(source)));

byte[] b = new byte[1024];
int numBytes = 0;
while ((numBytes = in.read(b)) > 0) {
out.write(b, 0, numBytes);
}

// Close all the file descripters
in.close();
out.close();
fileSystem.close();
}

5.Delete a file from HDFS.

You can use the following:

For removing a directory or a file:

hadoop fs -rmr <hdfs path>

If you want to skip the trash also, use:

hadoop fs -rmr -skipTrash <hdfs path>

 

public void deleteFile(String file) throws IOException {
Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path(file);
if (!fileSystem.exists(path)) {
System.out.println("File " + file + " does not exists");
return;
}

fileSystem.delete(new Path(file), true);

fileSystem.close();
}

6.Get modification time of a file in HDFS.

If you have any idea on this let me know. 🙂

public void getModificationTime(String source) throws IOException{

Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);
Path srcPath = new Path(source);

// Check if the file already exists
if (!(fileSystem.exists(srcPath))) {
System.out.println("No such destination " + srcPath);
return;
}
// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());

FileStatus fileStatus = fileSystem.getFileStatus(srcPath);
long modificationTime = fileStatus.getModificationTime();

System.out.format("File %s; Modification time : %0.2f %n",filename,modificationTime);

}

7.Get the block locations of a file in HDFS.

public void getBlockLocations(String source) throws IOException{

Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);
Path srcPath = new Path(source);

// Check if the file already exists
if (!(ifExists(srcPath))) {
System.out.println("No such destination " + srcPath);
return;
}
// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());

FileStatus fileStatus = fileSystem.getFileStatus(srcPath);

BlockLocation[] blkLocations = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
int blkCount = blkLocations.length;

System.out.println("File :" + filename + "stored at:");
for (int i=0; i < blkCount; i++) {
String[] hosts = blkLocations[i].getHosts();
System.out.format("Host %d: %s %n", i, hosts);
}

}

8.List all the datanodes in terms of hostnames.
This is a neat way rather than looking up the /etc/hosts file in the namenode.

public void getHostnames () throws IOException{
Configuration config = new Configuration();
config.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
config.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
config.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fs = FileSystem.get(config);
DistributedFileSystem hdfs = (DistributedFileSystem) fs;
DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();

String[] names = new String[dataNodeStats.length];
for (int i = 0; i < dataNodeStats.length; i++) {
names[i] = dataNodeStats[i].getHostName();
System.out.println((dataNodeStats[i].getHostName()));
}
}

9.Create a new directory in HDFS.
Creating a directory will be done as:

hadoop fs -mkdir <hadoop fs path>

 

public void mkdir(String dir) throws IOException {
Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path(dir);
if (fileSystem.exists(path)) {
System.out.println("Dir " + dir + " already exists!");
return;
}

fileSystem.mkdirs(path);

fileSystem.close();
}

10. Read a file from HDFS

public void readFile(String file) throws IOException {
Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path(file);
if (!fileSystem.exists(path)) {
System.out.println("File " + file + " does not exists");
return;
}

FSDataInputStream in = fileSystem.open(path);

String filename = file.substring(file.lastIndexOf('/') + 1,
file.length());

OutputStream out = new BufferedOutputStream(new FileOutputStream(
new File(filename)));

byte[] b = new byte[1024];
int numBytes = 0;
while ((numBytes = in.read(b)) > 0) {
out.write(b, 0, numBytes);
}

in.close();
out.close();
fileSystem.close();
}

11.Checking if a file exists in HDFS

public boolean ifExists (Path source) throws IOException{

Configuration config = new Configuration();
config.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
config.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
config.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem hdfs = FileSystem.get(config);
boolean isExists = hdfs.exists(source);
return isExists;
}

I know this is no way complete. But this is a rather long post. I hope it is useful. Responses appreciated!
And here is the complete code for HDFSClient.java. Happy Hadooping! 🙂

/*
Feel free to use, copy and distribute this program in any form.
HDFSClient.java
https://linuxjunkies.wordpress.com/
2011
*/

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;

public class HDFSClient {

public HDFSClient() {

}

public static void printUsage(){
System.out.println("Usage: hdfsclient add" + "<local_path> <hdfs_path>");
System.out.println("Usage: hdfsclient read" + "<hdfs_path>");
System.out.println("Usage: hdfsclient delete" + "<hdfs_path>");
System.out.println("Usage: hdfsclient mkdir" + "<hdfs_path>");
System.out.println("Usage: hdfsclient copyfromlocal" + "<local_path> <hdfs_path>");
System.out.println("Usage: hdfsclient copytolocal" + " <hdfs_path> <local_path> ");
System.out.println("Usage: hdfsclient modificationtime" + "<hdfs_path>");
System.out.println("Usage: hdfsclient getblocklocations" + "<hdfs_path>");
System.out.println("Usage: hdfsclient gethostnames");
}

public boolean ifExists (Path source) throws IOException{

Configuration config = new Configuration();
config.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
config.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
config.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem hdfs = FileSystem.get(config);
boolean isExists = hdfs.exists(source);
return isExists;
}

public void getHostnames () throws IOException{
Configuration config = new Configuration();
config.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
config.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
config.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fs = FileSystem.get(config);
DistributedFileSystem hdfs = (DistributedFileSystem) fs;
DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();

String[] names = new String[dataNodeStats.length];
for (int i = 0; i < dataNodeStats.length; i++) {
names[i] = dataNodeStats[i].getHostName();
System.out.println((dataNodeStats[i].getHostName()));
}
}

public void getBlockLocations(String source) throws IOException{

Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);
Path srcPath = new Path(source);

// Check if the file already exists
if (!(ifExists(srcPath))) {
System.out.println("No such destination " + srcPath);
return;
}
// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());

FileStatus fileStatus = fileSystem.getFileStatus(srcPath);

BlockLocation[] blkLocations = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
int blkCount = blkLocations.length;

System.out.println("File :" + filename + "stored at:");
for (int i=0; i < blkCount; i++) {
String[] hosts = blkLocations[i].getHosts();
System.out.format("Host %d: %s %n", i, hosts);
}

}

public void getModificationTime(String source) throws IOException{

Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);
Path srcPath = new Path(source);

// Check if the file already exists
if (!(fileSystem.exists(srcPath))) {
System.out.println("No such destination " + srcPath);
return;
}
// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());

FileStatus fileStatus = fileSystem.getFileStatus(srcPath);
long modificationTime = fileStatus.getModificationTime();

System.out.format("File %s; Modification time : %0.2f %n",filename,modificationTime);

}

public void copyFromLocal (String source, String dest) throws IOException {

Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);
Path srcPath = new Path(source);

Path dstPath = new Path(dest);
// Check if the file already exists
if (!(fileSystem.exists(dstPath))) {
System.out.println("No such destination " + dstPath);
return;
}

// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());

try{
fileSystem.copyFromLocalFile(srcPath, dstPath);
System.out.println("File " + filename + "copied to " + dest);
}catch(Exception e){
System.err.println("Exception caught! :" + e);
System.exit(1);
}finally{
fileSystem.close();
}
}

public void copyToLocal (String source, String dest) throws IOException {

Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);
Path srcPath = new Path(source);

Path dstPath = new Path(dest);
// Check if the file already exists
if (!(fileSystem.exists(srcPath))) {
System.out.println("No such destination " + srcPath);
return;
}

// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());

try{
fileSystem.copyToLocalFile(srcPath, dstPath);
System.out.println("File " + filename + "copied to " + dest);
}catch(Exception e){
System.err.println("Exception caught! :" + e);
System.exit(1);
}finally{
fileSystem.close();
}
}

public void renameFile (String fromthis, String tothis) throws IOException{
Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);
Path fromPath = new Path(fromthis);
Path toPath = new Path(tothis);

if (!(fileSystem.exists(fromPath))) {
System.out.println("No such destination " + fromPath);
return;
}

if (fileSystem.exists(toPath)) {
System.out.println("Already exists! " + toPath);
return;
}

try{
boolean isRenamed = fileSystem.rename(fromPath, toPath);
if(isRenamed){
System.out.println("Renamed from " + fromthis + "to " + tothis);
}
}catch(Exception e){
System.out.println("Exception :" + e);
System.exit(1);
}finally{
fileSystem.close();
}

}

public void addFile(String source, String dest) throws IOException {

// Conf object will read the HDFS configuration parameters
Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());

// Create the destination path including the filename.
if (dest.charAt(dest.length() - 1) != '/') {
dest = dest + "/" + filename;
} else {
dest = dest + filename;
}

// Check if the file already exists
Path path = new Path(dest);
if (fileSystem.exists(path)) {
System.out.println("File " + dest + " already exists");
return;
}

// Create a new file and write data to it.
FSDataOutputStream out = fileSystem.create(path);
InputStream in = new BufferedInputStream(new FileInputStream(
new File(source)));

byte[] b = new byte[1024];
int numBytes = 0;
while ((numBytes = in.read(b)) > 0) {
out.write(b, 0, numBytes);
}

// Close all the file descripters
in.close();
out.close();
fileSystem.close();
}

public void readFile(String file) throws IOException {
Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path(file);
if (!fileSystem.exists(path)) {
System.out.println("File " + file + " does not exists");
return;
}

FSDataInputStream in = fileSystem.open(path);

String filename = file.substring(file.lastIndexOf('/') + 1,
file.length());

OutputStream out = new BufferedOutputStream(new FileOutputStream(
new File(filename)));

byte[] b = new byte[1024];
int numBytes = 0;
while ((numBytes = in.read(b)) > 0) {
out.write(b, 0, numBytes);
}

in.close();
out.close();
fileSystem.close();
}

public void deleteFile(String file) throws IOException {
Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path(file);
if (!fileSystem.exists(path)) {
System.out.println("File " + file + " does not exists");
return;
}

fileSystem.delete(new Path(file), true);

fileSystem.close();
}

public void mkdir(String dir) throws IOException {
Configuration conf = new Configuration();
conf.addResource(new Path("/home/hadoop/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/hdfs-site.xml"));
conf.addResource(new Path("/home/hadoop/hadoop/conf/mapred-site.xml"));

FileSystem fileSystem = FileSystem.get(conf);

Path path = new Path(dir);
if (fileSystem.exists(path)) {
System.out.println("Dir " + dir + " already exists!");
return;
}

fileSystem.mkdirs(path);

fileSystem.close();
}

public static void main(String[] args) throws IOException {

if (args.length < 1) {
printUsage();
System.exit(1);
}

HDFSClient client = new HDFSClient();
if (args[0].equals("add")) {
if (args.length < 3) {
System.out.println("Usage: hdfsclient add <local_path> " + "<hdfs_path>");
System.exit(1);
}
client.addFile(args[1], args[2]);

} else if (args[0].equals("read")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient read <hdfs_path>");
System.exit(1);
}
client.readFile(args[1]);

} else if (args[0].equals("delete")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient delete <hdfs_path>");
System.exit(1);
}

client.deleteFile(args[1]);
} else if (args[0].equals("mkdir")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient mkdir <hdfs_path>");
System.exit(1);
}

client.mkdir(args[1]);
}else if (args[0].equals("copyfromlocal")) {
if (args.length < 3) {
System.out.println("Usage: hdfsclient copyfromlocal <from_local_path> <to_hdfs_path>");
System.exit(1);
}

client.copyFromLocal(args[1], args[2]);
} else if (args[0].equals("rename")) {
if (args.length < 3) {
System.out.println("Usage: hdfsclient rename <old_hdfs_path> <new_hdfs_path>");
System.exit(1);
}

client.renameFile(args[1], args[2]);
}else if (args[0].equals("copytolocal")) {
if (args.length < 3) {
System.out.println("Usage: hdfsclient copytolocal <from_hdfs_path> <to_local_path>");
System.exit(1);
}

client.copyToLocal(args[1], args[2]);
}else if (args[0].equals("modificationtime")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient modificationtime <hdfs_path>");
System.exit(1);
}

client.getModificationTime(args[1]);
}else if (args[0].equals("getblocklocations")) {
if (args.length < 2) {
System.out.println("Usage: hdfsclient getblocklocations <hdfs_path>");
System.exit(1);
}

client.getBlockLocations(args[1]);
} else if (args[0].equals("gethostnames")) {

client.getHostnames();
}else {

printUsage();
System.exit(1);
}

System.out.println("Done!");
}
}
A HDFSClient for Hadoop using the native JAVA API, a tutorial