• scala实现wordcount方法-商品标签统计-气温统计


    scala实现单词统计
    ---------------------
        import scala.io.Source
    
        /**
          * Created by Administrator on 2018/5/7.
          */
        object WCApp {
            def main(args: Array[String]): Unit = {
                //1.加载文件
                val src = Source.fromFile("d:/mr/word.txt")
    
                //2.取得所有行
                val lines = src.getLines().toList
        //        for(line <- lines){
        //            println(line)
        //        }
    
                //3.压扁单词
                val words = lines.flatMap(_.split(" "))
    
                //标一成对
                val map1 = words.map((w:String) => (w, 1))
    
                //按照单词分组
                val map2 = map1.groupBy(t=>t._1)
                val map3 = map2.mapValues(list => list.size)
    
                lines.foreach(println)
            }
    
        }
    
    
    
    scala实现单词统计2
    ---------------------
        import scala.io.Source
    
        /**
          * Created by Administrator on 2018/5/7.
          */
        object WCApp2 {
            def main(args: Array[String]): Unit = {
                //1.加载文件
                val src = Source.fromFile("d:/mr/word.txt")
    
                //2.取得所有行
                val lines = src.getLines().toList
        //        for(line <- lines){
        //            println(line)
        //        }
    
                //3.压扁单词
                val words = lines.flatMap(_.split(" "))
    
                //标一成对
                val map1 = words.map((w:String) => (w, 1))
    
                //按照单词分组{hello->[(hello,1),(hello,1),(hello,1)]}
                val map2 = map1.groupBy(t=>t._1)
    
                //{hello->(hello,4) , ...}
                val map3 = map2.mapValues(list => {
        //            def op(a:Tuple2[String,Int] , b:Tuple2[String,Int]) = {
        //                val word = a._1
        //                val cnt = a._2 + b._2
        //                (word , cnt)
        //            }
        //            list.reduce(op _)
                    list.reduce((a,b)=>(a._1,a._2 + b._2 ))
                })
                //
                val map4 = map3.map((t:Tuple2[String,Tuple2[String,Int]])=>t._2)
                map4.foreach(println)
            }
    
        }
    
    
    
    
    Bitmap实现topn统计
    ------------------------
        import scala.io.Source
    
        /**
          * 气温的年度内topN查询,使用reduce实现
          */
        object TempTopN2_Bitmap {
            def main(args: Array[String]): Unit = {
                //1.加载气温文件
                val f = Source.fromFile("d:/mr/temp.dat")
    
                //2.取得所有行
                val temps = f.getLines().toList
    
                //3.提取每行的年度和气温,形成元组{(1900,28),....}
                val map1 = temps.map((line:String) => {
                    val arr = line.split(" ")
                    val year = arr(0).toInt
                    val temp = arr(1).toInt
                    (year, temp)
                })
    
                //4.按照年度分组{(1920->{(),(),(),...}),...}
                val map2 = map1.groupBy((t:Tuple2[Int,Int])=>t._1)
    
                //5.对每个key对应的value进行按照气温只top3聚合
                val map3 = map2.mapValues(list=>{
                    val bytes = list.foldLeft(new Array[Byte](128))((a,b)=>{
                        val temp = b._2
                        if(temp > 0){
                            val index = temp / 8
                            val mod = temp % 8
                            a(index) = (a(index) | (1 << mod)).toByte
                        }
                        a
                    })
    
                    //定义方法,处理bitmap
                    def process(): String ={
                        var count = 0;
                        var tempStr = "";
                        for (x <- (0 until bytes.length).reverse) {
                            val b = bytes(x)
                            for (y <- (0 to 7).reverse) {
                                if (((b >> y) & 1) != 0) {
                                    count += 1
                                    tempStr = tempStr + "," + (8 * x + y)
                                    if (count == 3) {
                                        return tempStr
                                    }
                                }
                            }
                        }
                        tempStr
                    }
                    process()
                })
    
                val map4 = map3.toList.sortBy(e=>e._1)
                map4.foreach(println(_))
            }
        }
    
    scala实现商品评论
    ---------------------
        1.TagUtil.java
            package com.oldboy.scala.util;
    
            import com.alibaba.fastjson.JSON;
            import com.alibaba.fastjson.JSONArray;
            import com.alibaba.fastjson.JSONObject;
    
            import java.util.ArrayList;
            import java.util.List;
    
            /**
             * 标签工具类
             */
            public class TagUtil {
                /**
                 * 从json数据中抽取出评论集合
                 */
                public static List<String> extractTags(String json){
                    //评论集合
                    List<String> tags = new ArrayList<String>() ;
    
                    //将文件解析成json对象
                    JSONObject obj = JSON.parseObject(json) ;
    
                    //得到数组
                    JSONArray array = obj.getJSONArray("extInfoList");
    
                    //判断数组有效性
                    if(array != null && array.size() > 0){
                        JSONObject obj2 = array.getJSONObject(0);
                        JSONArray arr2 = obj2.getJSONArray("values") ;
                        if(arr2 != null && arr2.size() > 0 ){
                            for(int i = 0 ; i < arr2.size() ; i ++){
                                tags.add(arr2.getString(i));
                            }
                        }
                    }
    
                    return tags ;
                }
            }
    
        2.TaggenDemo
            import javax.swing.text.html.HTML.Tag
    
            import com.oldboy.scala.util.TagUtil
    
            import scala.io.Source
    
            /**
              * 便签生成统计
              */
            object TaggenDemo {
    
                def main(args: Array[String]): Unit = {
                    //1.加载文件
                    val file = Source.fromFile("d:/mr/temptags.txt") ;
    
                    //2.提取所有行
                    val lines = file.getLines().toList
    
                    //3.压扁变换每行形成(busid,tag)
                    val map1 = lines.flatMap(line=>{
                        var list0:List[(String,String)] = Nil
                        var arr = line.split("	")
                        val busid = arr(0)
                        var json = arr(1)
                        import scala.collection.JavaConversions._
                        val list:List[String] = TagUtil.extractTags(json).toList ;
                        for(tag <- list){
                            list0 = (busid, tag) +: list0
                        }
                        list0
                    })
    
                    //4.对元组进行分组,{(busid,tag)->List((busid,tag),(busid,tag),...}
                    val map2 = map1.groupBy(t => t)
    
                    //5.统计每个key下List的size,{(busid,tag)->300}
                    val map3 = map2.mapValues(_.size)
    
                    //6.交换元素位置,List((busid , (tag,cnt)),...)
                    val map4 = map3.toList.map(t=>(t._1._1 , (t._1._2,t._2)))
    
                    //7.按照busid再次分组Map(busid->List((busid , (tag,cnt)),...))
                    val map5 = map4.groupBy(t=>t._1)
    
                    //8.对每个商家内的评论按照数量倒排序.Map(busid->List((busid,(tag,59)))
                    val map6 = map5.mapValues(list=>{
                        val list2 = list.sortBy(t=> -t._2._2).take(5)
                        val list3 = list2.map(t=>t._2)
                        list3
                    })
    
                    //9.对商家进行排序,按照商家的最大评论数倒排序
                    val map7 = map6.toList.sortBy(t=> -t._2(0)._2)
                    map7.foreach(t=>{
                        val busid = t._1
                        val str = t._2.mkString(";")
                        println(busid + "==>" + str)
                    })
                }
            }
  • 相关阅读:
    在二进制与文本之间转换plist文件
    iOS 音频分贝的计算
    iOS 圆形水波浪效果实现
    iOS画圆、画线
    iOS IM开发准备工作(四)CocoaAsyncSocket的使用
    iOS IM开发准备工作(三)乱说Socket
    iOS IM开发准备工作(二)protobuf-objc安装及使用
    iOS IM开发准备工作(一)XML解析
    iOS IM开发blog写作计划
    西游记倒着看。。我从贴吧看来的
  • 原文地址:https://www.cnblogs.com/zyde/p/9004770.html
Copyright © 2020-2023  润新知