Tuesday, October 27, 2015

Read Content of Bzip file

INTRODUCTION

This post will help to unzip bz2 file which contains a list of XML files and generate JSON out of it.

Consider the content in the XML file as single line string.

SOFTWARES & TOOLS

  1. Eclipse.
  2. Commons-compress-1.10.jar
  3. Jackson-all-1.9.0.jar

My Input files are: Employees1.xml.bz2 & Employees2.xml.bz2
My Output files are: Employees1.json & Employees2.json

IMPLEMENTATION


  1. package jbr.json_xmlbzip;  
  2.   
  3. import java.io.BufferedInputStream;  
  4. import java.io.BufferedReader;  
  5. import java.io.File;  
  6. import java.io.FileInputStream;  
  7. import java.io.FileNotFoundException;  
  8. import java.io.FileOutputStream;  
  9. import java.io.IOException;  
  10. import java.io.InputStreamReader;  
  11. import java.util.ArrayList;  
  12. import java.util.List;  
  13. import java.util.Map;  
  14. import java.util.TreeMap;  
  15.   
  16. import javax.xml.parsers.DocumentBuilder;  
  17. import javax.xml.parsers.DocumentBuilderFactory;  
  18. import javax.xml.parsers.ParserConfigurationException;  
  19. import javax.xml.xpath.XPath;  
  20. import javax.xml.xpath.XPathConstants;  
  21. import javax.xml.xpath.XPathExpressionException;  
  22. import javax.xml.xpath.XPathFactory;  
  23.   
  24. import org.apache.commons.compress.compressors.CompressorException;  
  25. import org.apache.commons.compress.compressors.CompressorInputStream;  
  26. import org.apache.commons.compress.compressors.CompressorStreamFactory;  
  27. import org.codehaus.jackson.JsonGenerationException;  
  28. import org.codehaus.jackson.map.JsonMappingException;  
  29. import org.codehaus.jackson.map.ObjectMapper;  
  30. import org.w3c.dom.Document;  
  31. import org.w3c.dom.Node;  
  32. import org.w3c.dom.NodeList;  
  33. import org.xml.sax.SAXException;  
  34.   
  35. public class JsonFromBzipXml {  
  36.   
  37.   public static FileInputStream fis;  
  38.   public static DocumentBuilderFactory docBuilderFactory;  
  39.   public static DocumentBuilder docBuilder;  
  40.   public static Document xmlDocument;  
  41.   public static XPath xpathObj;  
  42.   
  43.   public void loadConfig(File inputFile) {  
  44.     try {  
  45.       fis = new FileInputStream(inputFile);  
  46.       docBuilderFactory = DocumentBuilderFactory.newInstance();  
  47.       docBuilder = docBuilderFactory.newDocumentBuilder();  
  48.       xmlDocument = docBuilder.parse(fis);  
  49.       xpathObj = XPathFactory.newInstance().newXPath();  
  50.     } catch (FileNotFoundException fe) {  
  51.       fe.printStackTrace();  
  52.     } catch (ParserConfigurationException pe) {  
  53.       pe.printStackTrace();  
  54.     } catch (IOException ie) {  
  55.       ie.printStackTrace();  
  56.     } catch (SAXException se) {  
  57.       se.printStackTrace();  
  58.     }  
  59.   }  
  60.   
  61.   /** 
  62.    * Start the Json Conversion 
  63.    *  
  64.    * @param args 
  65.    * @throws IOException 
  66.    * @throws CompressorException 
  67.    */  
  68.   public static void main(String[] args) throws IOException, CompressorException, XPathExpressionException {  
  69.     JsonFromBzipXml converter = new JsonFromBzipXml();  
  70.     converter.generateJson();  
  71.   }  
  72.   
  73.   public void generateJson() throws XPathExpressionException {  
  74.     Map<String, List<Map<String, String>>> employees = new TreeMap<>();  
  75.     Map<String, String> employee = null;  
  76.   
  77.     File xmlFile = null;  
  78.     FileInputStream fileInputStream = null;  
  79.     BufferedInputStream bufferedInputStream = null;  
  80.     CompressorInputStream compressorInputStream = null;  
  81.     BufferedReader bufferedReader = null;  
  82.   
  83.     try {  
  84.       File[] bzipFiles = new File(JsonConstants.INPUT_BZIP_DIRECTORY).listFiles();  
  85.   
  86.       for (File bzipFile : bzipFiles) {  
  87.   
  88.         List<Map<String, String>> empList = new ArrayList<>();  
  89.   
  90.         // Unzip bzip Files  
  91.         fileInputStream = new FileInputStream(bzipFile);  
  92.         bufferedInputStream = new BufferedInputStream(fileInputStream);  
  93.         compressorInputStream = new CompressorStreamFactory().createCompressorInputStream(bufferedInputStream);  
  94.         bufferedReader = new BufferedReader(new InputStreamReader(compressorInputStream));  
  95.   
  96.         String line = null;  
  97.   
  98.         // System.out.println(bzipFile);  
  99.   
  100.         while ((line = bufferedReader.readLine()) != null) {  
  101.           employee = new TreeMap<>();  
  102.           xmlFile = new File(bzipFile.getName().replace(".bz2"""));  
  103.           xmlFile.createNewFile();  
  104.           FileOutputStream fileOutputStream = new FileOutputStream(xmlFile);  
  105.           fileOutputStream.write(line.getBytes());  
  106.           // System.out.println(line);  
  107.   
  108.           // Load the xml file  
  109.           loadConfig(xmlFile);  
  110.   
  111.           System.out.println("\n===Retrieve all Elements of an Employee====");  
  112.           String xpath = "/Employees/Employee";  
  113.           System.out.println("xpath: " + xpath);  
  114.           Node node = (Node) xpathObj.compile(xpath).evaluate(xmlDocument, XPathConstants.NODE);  
  115.           if (null != node) {  
  116.             NodeList nodeList = node.getChildNodes();  
  117.   
  118.             for (int i = 0null != nodeList && i < nodeList.getLength(); i++) {  
  119.               Node subNode = nodeList.item(i);  
  120.               if (subNode.getNodeType() == Node.ELEMENT_NODE)  
  121.                 System.out.println(nodeList.item(i).getNodeName() + " : " + subNode.getFirstChild().getNodeValue());  
  122.               employee.put(nodeList.item(i).getNodeName(), subNode.getFirstChild().getNodeValue());  
  123.             }  
  124.           }  
  125.   
  126.           fileOutputStream.close();  
  127.         } // end while  
  128.   
  129.         empList.add(employee);  
  130.         employees.put("employees", empList);  
  131.         toJsonFromMap(employees, JsonConstants.OUTPUT_JSON_DIRECTORY + xmlFile.getName().replace(".xml"".json"));  
  132.   
  133.       } // end for  
  134.   
  135.     } catch (IOException ioException) {  
  136.       ioException.printStackTrace();  
  137.     } catch (CompressorException ce) {  
  138.       ce.printStackTrace();  
  139.     }  
  140.   }  
  141.   
  142.   public static void toJsonFromMap(Map map, String outputFile) {  
  143.     try {  
  144.       new ObjectMapper().writeValue(new File(outputFile), map);  
  145.     } catch (JsonGenerationException e) {  
  146.       e.printStackTrace();  
  147.     } catch (JsonMappingException e) {  
  148.       e.printStackTrace();  
  149.     } catch (IOException e) {  
  150.       e.printStackTrace();  
  151.     }  
  152.   }  
  153.   
  154.   public static String toJsonFromList1(List list) {  
  155.     String value = "";  
  156.     try {  
  157.       value = new ObjectMapper().writeValueAsString(list);  
  158.     } catch (JsonGenerationException e) {  
  159.       e.printStackTrace();  
  160.     } catch (JsonMappingException e) {  
  161.       e.printStackTrace();  
  162.     } catch (IOException e) {  
  163.       e.printStackTrace();  
  164.     }  
  165.   
  166.     return value;  
  167.   }  
  168.   
  169.   public interface JsonConstants {  
  170.     String INPUT_BZIP_DIRECTORY = "input/bzip";  
  171.     String OUTPUT_JSON_DIRECTORY = "output/bzip-jsons/";  
  172.   }  
  173. }  



OUTPUT


Console


===Retrieve all Elements of an Employee====
xpath: /Employees/Employee
firstname : Ranjith
lastname : Sekar
age : 30
address : Chennai
email : ranjith@gmail.com

===Retrieve all Elements of an Employee====
xpath: /Employees/Employee
firstname : Ranjith
lastname : Sekar
age : 30
address : Chennai
email : ranjith@gmail.com

JSON File


{"employees":[{"address":"Chennai","age":"30","email":"ranjith@gmail.com","firstname":"Ranjith","lastname":"Sekar"}]}

No comments :

Post a Comment