1. Modify the storage time of kafka data in the subject by default of 7 days ------------------------------------------------- [kafka/conf/server.properties] log.retention.hours=1 2. aggregate query using hive ---------------------------------------------------- 1.hive command line query // Query all caller = xxx calls and group them according to calltime select count(*) , calltime from ext_calllogs_in_hbase where caller = '15338597777' group by calltime ; // Query all caller = xxx calls and group them by month select count(*) , substr(calltime,1,6) from ext_calllogs_in_hbase where caller = '15338597777' group by substr(calltime,1,6) ; // Query caller = xxx, year is all call records for 2017, grouped by month select count(*) , substr(calltime,1,6) from ext_calllogs_in_hbase where caller = '15338597777' and substr(calltime,1,4) == '2018' group by substr(calltime,1,6) ; // Query the number of calls per month for all users select caller,substr(calltime,1,6) , count(*) from ext_calllogs_in_hbase group by caller , substr(calltime,1,6) ; 2. Programming implementation: the number of calls per month in a given year a. Adding new methods to the HiveService class
/** * Query the number of calls made in each month of the designated person's designated year */ public List<CalllogStat> statCalllogsCount(String caller, String year){ List<CalllogStat> list = new ArrayList<CalllogStat>() ; try { Connection conn = DriverManager.getConnection(url); Statement st = conn.createStatement(); //String: select count (*), substr (call time, 1,6) from ext_calllogs_in_hbase where caller = 15338597777' // and substr(calltime,1,4) == '2018' group by substr(calltime,1,6) ; String sql = "select count(*) ,substr(calltime,1,6) from ext_calllogs_in_hbase " + "where caller = '" + caller+"' and substr(calltime,1,4) == '" + year + "' group by substr(calltime,1,6)"; ResultSet rs = st.executeQuery(sql); Calllog log = null; while (rs.next()) { CalllogStat logSt = new CalllogStat(); logSt.setCount(rs.getInt(1)); logSt.setYearMonth(rs.getString(2)); list.add(logSt); } rs.close(); return list; } catch (Exception e) { e.printStackTrace(); } return null; }
b. Adding a new controller to Controller
/** * Statistical appointees, the number of calls in a given month */ @RequestMapping("/calllog/toStatCalllog") public String toStatCalllog(){ return "calllog/statCalllog" ; } /** * Statistical appointees, the number of calls in a given month */ @RequestMapping("/calllog/statCalllog") public String statCalllog(Model m ,@RequestParam("caller") String caller ,@RequestParam("year") String year){ List<CalllogStat> list = hcs.statCalllogsCount(caller, year); m.addAttribute("stat" , list) ; return "calllog/statCalllog" ; }
c. Writing Front-end Interface Display
<%@ page contentType="text/html;charset=UTF-8" language="java" %> <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> <html> <head> <title>Statistical results of call records</title> <link rel="stylesheet" type="text/css" href="../css/my.css"> <script type="text/javascript" src="../js/jquery-3.2.0.min.js"></script> <script type="text/javascript"> //Defined function function refreshTable(){ $("#t1 tbody").empty(); $.getJSON("/calllog/json/findAll", function (data) { $.each(data, function (i, obj) { var str = "<tr><td>" + obj.caller + "</td>"; str = str + "<td> " + obj.callerName + "</td>"; str = str + "<td> " + obj.callee + "</td>"; str = str + "<td> " + obj.calleeName + "</td>"; str = str + "<td></td>"; str = str + "<td> " + obj.callTime + "</td>"; str = str + "<td> " + obj.callDuration + "</td>"; str = str + "</tr>"; $("#t1 tbody").append(str); }); }); } $(function(){ setInterval(refreshTable, 2000); }) </script> </head> <body> <form action='<c:url value="/calllog/statCalllog" />' method="post"> //Telephone number: <input type="text" name="caller">br> //Year: <input type="text" name="year">br> <input type="submit" name="query"> </form> <br> <table id="t1" border="1px" class="t-1" style="width: 800px"> <thead> <tr> <td>Month</td> <td>frequency</td> </tr> </thead> <tbody> <c:forEach items="${stat}" var="s"> <tr> <td><c:out value="${s.yearMonth}"/></td> <td><c:out value="${s.count}"/></td> </tr> </c:forEach> </tbody> </table> </body> </html>
d. Run ssm app and enter the website for testing 3. Linux uses awk command to kill process in batches according to java process name ------------------------------------------------------ 1.awk is to read the file line by line, slice each line with space as default delimiter, and then analyze the cut part. $> JPS | awk'{print $1}'; // print the first column JPS | awk-F'.'{print $1}'; // Specified delimiter'.'(default is blank) 2.shell programming closes kafka $> kill -9 `jps | grep Kafka | awk '{print $1}'`; 3. Dynamic extraction of ip $> ifconfig | grep inet | head -1 | awk '{print $2}'; 4. Modification of Quick bash Script ------------------------------------------------------- 1.[xkill.sh] #!/bin/bash pids=`jps | grep $1 | awk '{print $1}'` for pid in $pids ; do kill -9 $pid done 2.[xcall.sh] #!/bin/bash params=$@ i=201 for (( i=201 ; i <= 206 ; i = $i + 1 )) ; do tput setaf 2 echo ============= s$i ============= tput setaf 7 ssh -4 s$i "source /etc/profile ; $params" done 3. Open kafka cluster [/usr/local/bin/xkafka-cluster-start.sh] #!/bin/bash servsers="s200 s300 s400" for s in $servers ; do ssh $s "source /etc/profile ; kafka-server-start.sh -daemon /soft/kakfa/config/server.properties" done 4. Start zk cluster [/usr/local/bin/xzk-cluster-start.sh] #!/bin/bash servers="s100 s200 s300" for s in $servers ; do ssh $s "source /etc/profile ; zkServer.sh start" done 5.xconsumer-start.sh [/usr/local/bin/xconsumer-start.sh] #!/bin/bash cd /home/centos/KafkaHbaseConsumer run.sh & 6.s201:xflume-calllog-start.sh [/usr/local/bin/xconsumer-start.sh] #!/bin/bash cd /soft/flume/conf flume-ng agent -f calllog.conf -n a1 & Fifth, modify Calllog Controller to solve the problem of json scrambling ------------------------------------------------------------
/** * Simulate the bottom request and response to return json directly to the front page */ @RequestMapping("calllog/json/findAll") public String findAllJson(HttpServletResponse response) { try { List<Calllog> list = cs.findAll(); String jsonStr = JSONArray.toJSONString(list); //Set the data type of the response to be a json string response.setContentType("application/json"); response.setCharacterEncoding("utf-8"); //Get the output stream sent to the client ServletOutputStream sos = response.getOutputStream(); sos.write(jsonStr.getBytes("utf-8")); sos.flush(); sos.close(); } catch (IOException e) { e.printStackTrace(); } return null; }
6. Using echarts to realize data visualization--histogram ----------------------------------------------------- 1. Download the echarts script and map the ssm project in the web/js directory http://echarts.baidu.com/dist/echarts.js 2.echarts introductory demonstration bar.html -- bar chart
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>bar.html</title> <script src="../js/jquery-3.2.0.min.js"></script> <script src="../js/echarts.js"></script> <script> $(function () { var myChart = echarts.init(document.getElementById('main')); var option = { title: { text: 'xxxx2018 Annual monthly calls' }, tooltip: {}, legend: { data: ['number of calls'] }, xAxis: { data: ["1 Month", "2 Month", "3 Month"] }, yAxis: {}, series: [{ name: 'number of calls', type: 'bar', data: [100, 300, 280] }] }; myChart.setOption(option); }) </script> </head> <body> <div id="main" style="border:1px solid blue;width:600px;height:400px;"> </div> </body> </html>
3. JSP interface statCalllog.jsp for modifying statistics
<%@ page contentType="text/html;charset=UTF-8" language="java" %> <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> <html> <head> <title>Statistical results of call records</title> <link rel="stylesheet" type="text/css" href="../css/my.css"> <script type="text/javascript" src="../js/jquery-3.2.0.min.js"></script> <script src="../js/echarts.js"></script> <script> $(function () { var myChart = echarts.init(document.getElementById('main')); var option = { title: { text: '<c:out value="${title}" />' }, tooltip: {}, legend: { data: ['number of calls'] }, xAxis: { data: [<c:forEach items="${list}" var="e">'<c:out value="${e.yearMonth}"/>',</c:forEach>] }, yAxis: {}, series: [{ name: 'number of calls', type: 'bar', data: [<c:forEach items="${list}" var="e"><c:out value="${e.count}"/>,</c:forEach>] }] }; myChart.setOption(option); }) </script> </head> <body> <form action='<c:url value="/calllog/statCalllog" />' method="post"> //Telephone number: <input type="text" name="caller">br> //Year: <input type="text" name="year">br> <input type="submit" name="query"> </form> <div id="main" style="border:1px solid blue;width:600px;height:400px;"> </div> </body> </html>
4. Modify CallLogController.java
/** * Statistical appointees, the number of calls in a given month */ @RequestMapping("/calllog/statCalllog") public String statCalllog(Model m ,@RequestParam("caller") String caller ,@RequestParam("year") String year){ List<CalllogStat> list = hcs.statCalllogsCount(caller, year); m.addAttribute("title", caller + "stay" + year + "Statistics of telephone calls in each month of the year"); m.addAttribute("list", list); return "calllog/statCalllog" ; }
ganglia ----------------------------- 1. Introduction to ganglia Cluster monitoring. It can not only monitor the resource situation of a single host, but also count the whole resource of the cluster. gmond // collects resource data at each node. gmetad // Accepts each node to send resource data Gweb // webui, displaying data web programs, and communicating with gmetad. 2.Centos installs ganglia a.ganglia-gmond All nodes. $>sudo yum install -y ganglia-gmond $>sudo apt-get install -y ganglia-gmond b.ganglia-gmetad s201 $>sudo yum install -y ganglia-gmetad c.ganglia-gweb [s201] 1) Installation Dependence $>sudo yum install -y httpd php 2) Download the ganglia-web-3.5.12.tar.gz program wget http://ncu.dl.sourceforge.net/project/ganglia/ganglia-web/3.5.12/ganglia-web-3.5.12.tar.gz 3)tar open file 4) Modify Makefile files ... 5) Start up services [s201] $>sudo service httpd start $>sudo service gmetad start $>sudo service gmond start [s202] $>sudo service gmond start 3. Install in yum mode if there is no available source a. Source switching (aliyan - >) For large data ecosphere projects, cloudera has a complete warehouse and no bug s. Cloudera-cdh-5.repo - >/etc/yum.repo.d/ b. Clear the cache $>sudo yum cleanall c. Reconstructing Cache $>sudo yum make cache d. Continue to install through yum 4. Installation of ganglia in Ubuntu --------------------------------- 1. Install monitor and webfrontend on Master node s100 [slave node does not need to install web] $s100> sudo apt-get update $s100> sudo apt-get install ganglia-monitor rrdtool gmetad ganglia-webfrontend 2. Configure ganglia mycluster Copy the Ganglia webfrontend Apache configuration: $s100> sudo cp /etc/ganglia-webfrontend/apache.conf /etc/apache2/sites-enabled/ganglia.conf b. Edit the configuration file of Ganglia metadaemon: $s100> sudo nano /etc/ganglia/gmetad.conf Find "data_source" my cluster "localhost" Change to data_source "mycluster" 192.168.43.131:8649 // List the data source for machine services [ha cluster name], number of clusters, IP: port, if no port number 8649 (default gmond port) is specified. c. Edit the configuration file of the primary node $s100> sudo nano /etc/ganglia/gmond.conf [------] cluster { Name = mycluster // unspecified changed to mycluster owner = "unspecified" latlong = "unspecified" url = "unspecified" } [-----] /* Feel free to specify as many udp_send_channels as you like. Gmond used to only support having a single channel */ udp_send_channel { # mcast_join = 239.2.11.71 host = 192.168.43.131 // / Add a row, master ip port = 8649 ttl = 1 } [-----] /* You can specify as many udp_recv_channels as you like as well. */ udp_recv_channel { # mcast_join = 239.2.11.71// commented out port = 8649 # bind = 239.2.11.71// commented out } d. Save out 3. Configure slave node configuration [s200 s300 s400 s500] Copy master modified / etc/ganglia/gmond.conf to slave nodes to replace the original file. $s100> scp gmond.conf root@s300:/etc/ganglia 4. Start hadoop, hbase cluster start-dfs.sh start-yarn.sh start-hbase.sh 5. Start ganglia [method 1] Sudo service ganglia-monitor start Sudo service gmetad start Sudo/etc/init.d/apache 2 restart (restart Apache 2 on the host) [Mode 2] or: Ganglia installed in apt-get mode can be started directly by service mode $s100> sudo /etc/init.d/ganglia-monitor start $s100> sudo /etc/init.d/gmetad start $s100> sudo /etc/init.d/apache2 restart 6. Verify that the installation was successful http://localhost/ganglia/ 8. Using udp protocol to realize process monitoring ---------------------------------------------------------- 1. New class udp.HeartBeatThread in data generation module ---------------------------------------------------
package udp; import calllog.gen.main.PropertiesUtil; import java.io.IOException; import java.net.*; /** * Tool class * Send heartbeat information to prove that the program is still alive * Monitoring use */ public class HeartBeatThread extends Thread{ private DatagramSocket sock; public HeartBeatThread() { try { sock = new DatagramSocket(PropertiesUtil.getInt("heartbeat.udp.send.port")); //Daemon thread this.setDaemon(true); } catch (SocketException e) { e.printStackTrace(); } } @Override public void run() { byte[] bs = new byte[1]; bs[0] = (byte)PropertiesUtil.getInt("heartbeat.udp.send.flag"); DatagramPacket packet = new DatagramPacket(bs,1); String bcAddr = PropertiesUtil.getString("heartbeat.udp.send.bcAddr"); int bcPort = PropertiesUtil.getInt("heartbeat.udp.send.bcPort"); packet.setSocketAddress(new InetSocketAddress(bcAddr,bcPort)); while(true) { try { sock.send(packet); Thread.sleep(PropertiesUtil.getInt("heartbeat.udp.send.sleep.ms")); System.out.println("Data generation module, send a heartbeat" + bs[0]); } catch (Exception e) { e.printStackTrace(); } } } }
2. Modify data to generate main function of App main program. When starting app, start monitoring thread at the same time.
public static void main(String [] args) { genCallLog(); //Open monitoring threads new HeartBeatThread().start(); }
3. Adding monitoring class com.ssm.monitor.MonitorService to the SM module to monitor heartbeat information sent by other processes
package com.ssm.monitor; import com.it18zhang.ssm.domain.HeartBeat; import com.it18zhang.ssm.util.PropertiesUtil; import org.springframework.stereotype.Service; import java.net.DatagramPacket; import java.net.DatagramSocket; import java.net.InetSocketAddress; import java.net.SocketException; import java.util.ArrayList; import java.util.List; /** * Classes for monitoring heartbeat information of other programs */ @Service("monitorService") public class MonitorService extends Thread{ private ReceiveThread t; public MonitorService() { //Open monitoring threads t = new ReceiveThread(); t.start(); } public List<HeartBeat> getHeartBeats() { return new ArrayList<HeartBeat>(t.map.values()); } }
4. Add the monitoring threading class com.ssm.monitor.ReceviceThread to the SM module
package com.ssm.monitor; import com.it18zhang.ssm.domain.HeartBeat; import com.it18zhang.ssm.util.PropertiesUtil; import java.net.DatagramPacket; import java.net.DatagramSocket; import java.net.InetSocketAddress; import java.net.SocketException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; public class ReceiveThread extends Thread{ private DatagramSocket sock; //ip + timestamp of the last heartbeat received public Map<String, HeartBeat> map = new HashMap<String, HeartBeat>(); public ReceiveThread() { try{ //Setting the receiving port sock = new DatagramSocket(PropertiesUtil.getInt("heartbeat.udp.receive.port")); //Daemon thread this.setDaemon(true); System.out.println("Start receiving heartbeat ..."); } catch(Exception e) { e.printStackTrace(); } } @Override public void run() { byte[] bs = new byte[1]; DatagramPacket packet = new DatagramPacket(bs,1); while(true) { try { sock.receive(packet); int flag = bs[0]; InetSocketAddress addr = (InetSocketAddress) packet.getSocketAddress(); String sendIp = addr.getAddress().getHostAddress(); map.put(sendIp,new HeartBeat(sendIp, flag, System.currentTimeMillis())); System.out.println("Receiving heartbeat" + flag); } catch (Exception e) { e.printStackTrace(); } } } }
5. Add a new javabean class to the domain package of ssm to encapsulate heartbeat information HeartBeat class
package com.it18zhang.ssm.domain; public class HeartBeat { //udp sender's ip private String ip; //Message content sent private int flag; //Events at the last time a message was received private long ts; public HeartBeat() { } public HeartBeat(String ip, int flag, long ts) { this.ip = ip; this.flag = flag; this.ts = ts; } public String getIp() { return ip; } public void setIp(String ip) { this.ip = ip; } public int getFlag() { return flag; } public void setFlag(int flag) { this.flag = flag; } public long getTs() { return ts; } public void setTs(long ts) { this.ts = ts; } }
6. Copy heartbeat sending code HeartBeatThread and tool class in consumer module and modify kafka configuration file to add heartbeat attributes [kafka.propeties] zookeeper.connect=s100:2181,s200:2181,s300:2181 group.id=calllog zookeeper.session.timeout.ms=500 zookeeper.sync.time.ms=250 auto.commit.interval.ms=1000 # Consumption from scratch auto.offset.reset=smallest Theme of Fu topic=calllog Table name table.name=call:calllogs Partition number partition.number=100 # Caller Marker caller.flag=0 Patterns for the # hash region hashcode.pattern=00 heartbeat.udp.send.port=6666 heartbeat.udp.send.flag=3 heartbeat.udp.send.bcAddr=192.168.43.255 heartbeat.udp.send.bcPort=9999 heartbeat.udp.send.sleep.ms=1000 7. Modify the HbaseCustomer class to start the heartbeat sending thread in the main function
package calllog.kafka.hbase.customer; import kafka.consumer.Consumer; import kafka.consumer.ConsumerConfig; import kafka.message.MessageAndMetadata; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import kafka.consumer.ConsumerIterator; import kafka.consumer.KafkaStream; import kafka.javaapi.consumer.ConsumerConnector; import java.util.Properties; /** * hbase Consumers, get log information from kafka and store it in hbase */ public class HbaseCustomer { public static void main(String [] args) { //Open heartbeat to send new HeartBeatThread().start(); //hbasedao HbaseDao dao = new HbaseDao(); //Create a consumer profile ConsumerConfig config = new ConsumerConfig(PropertiesUtil.props); //Creating Consumers ConsumerConnector consumer = Consumer.createJavaConsumerConnector(new ConsumerConfig(PropertiesUtil.props)); //Binding theme String topic = PropertiesUtil.getPorp("topic"); Map<String, Integer> map = new HashMap<String, Integer>(); map.put(topic, new Integer(1)); //Start consumption Map<String, List<KafkaStream<byte[], byte[]>>> kafkaMsg = consumer.createMessageStreams(map); List<KafkaStream<byte[], byte[]>> msgList = kafkaMsg.get(topic); String kafka_hbaseMsg = ""; for(KafkaStream<byte[],byte[]> msg : msgList) { ConsumerIterator<byte[],byte[]> mm = msg.iterator(); while (mm.hasNext()) { MessageAndMetadata<byte[], byte[]> next = mm.next(); byte [] m = next.message(); //Get message kafka_hbaseMsg = new String(m); //Write to hbase dao.put(kafka_hbaseMsg); } } } }
8. Write Monitor Controller in ssm module --------------------------------------
package com.it18zhang.ssm.web.controller; import com.alibaba.fastjson.JSON; import com.it18zhang.ssm.domain.HeartBeat; import com.ssm.monitor.MonitorService; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import javax.annotation.Resource; import java.util.List; @Controller public class MonitorController { @Resource(name="monitorService") private MonitorService ms; @RequestMapping("/monitor/toMonitorPage") public String toMonitorPage() { return "monitor/monitorPage"; } @RequestMapping("/json/monitor/getMonitorInfo") public String getMonitorInfo(@RequestParam("heartbeat")HeartBeat ht) { List<HeartBeat> list = ms.getHeartBeats(); return JSON.toJSONString(list); } }
9. Write JSP interface monitor/monitorPage.jsp
<%@ page contentType="text/html;charset=UTF-8" language="java" %> <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> <html> <head> <title>Call record</title> <link rel="stylesheet" type="text/css" href="../css/my.css"> <script type="text/javascript" src="../js/jquery-3.2.0.min.js"></script> <script type="text/javascript" > //Defined function function getHearbeatInfo(){ $("#div1").empty(); $.getJSON("/json/monitor/getMonitorInfo", function (data) { $("#div1").append(data); }); } $(function(){ setInterval(getHearbeatInfo, 1000); }) </script> </head> <body> <div id="div1" style="border:1px solid blue;width: 400px ; height: 300px"> </div> </body> </html>
10. Package the generated data program and the consumer program, throw them into ubuntu and run them. Open the ssm page for testing.