0


头歌大数据答案(自用)

第一关

# 命令行
start-all.sh
nohup hive --service metastore &
importorg.apache.spark.sql.SparkSessionimportorg.apache.spark.sql.functions.colimportorg.apache.spark.sql.functions._
object cleandata {
  def main(args:Array[String]):Unit={//创建spark对象
    val spark =SparkSession.builder().appName("HiveSupport").master("local[*]").config("spark.sql.warehouse.dir","hdfs://127.0.0.1:9000/opt/hive/warehouse").config("hive.metastore.uris","thrift://127.0.0.1:9083").config("dfs.client.use.datanode.hostname","true").enableHiveSupport().getOrCreate()//############# Begin ############//创建hive数据库daobidata
    spark.sql("create database daobidata")//创建hive数据表
    spark.sql("use daobidata")//创建diedata表
    spark.sql("create table if not exists diedata(bianh int,com_name string,"+"com_addr string,cat string,se_cat string,com_des string,born_data string,"+"death_data string,live_days int,financing string,total_money int,death_reason string,"+"invest_name string,ceo_name string,ceo_des string"+")row format delimited fields terminated by ',';")//将本地datadie.csv文件导入至hive数据库diedata表中
    spark.sql("load data local inpath '/data/workspace/myshixun/data/datadie.csv' into table diedata;")//进入diedata表进行清洗操作,删除为空的数据,根据倒闭原因切分出最主要原因,根据成立时间切分出,企业成立的年份,根据倒闭时间切分出,企业倒闭的年份
    val c1 = spark.table("diedata").na.drop("any").distinct().withColumn("death_reason",split(col("death_reason")," ")(0)).withColumn("bornyear",split(col("born_data"),"/")(0)).withColumn("deathyear",split(col("death_data"),"/")(0))
      
    c1.createOrReplaceTempView("c1")//创建die_data表
    spark.sql("create table if not exists die_data(bianh int,com_name string,"+"com_addr string,cat string,se_cat string,com_des string,born_data string,"+"death_data string,live_days int,financing string,total_money int,death_reason string,"+"invest_name string,ceo_name string,ceo_des string,bornyear string,deathyear string"+")row format delimited fields terminated by ',';")//将清洗完的数据导入至die_data表中
    spark.sql("insert overwrite table die_data select * from c1")//############# End ##############
    spark.stop()}}

第二关

importorg.apache.spark.sql.{SaveMode,SparkSession}
object citydiedata {
  def main(args:Array[String]):Unit={
    val spark =SparkSession.builder().appName("SparkCleanJob").master("local[*]").getOrCreate()//************* Begin **************//读取数据,用逗号分隔,第一行不做为数据,做为标题
    val df1 = spark.read.option("delimiter",",").option("header",true).csv("/data/workspace/myshixun/die_data.csv")
    df1.createOrReplaceTempView("df1")//使用spark SQL语句,根据城市统计企业倒闭top5
    val df=spark.sql("select df1.com_addr as com_addr,count(df1.com_addr) as saddr from df1 group by df1.com_addr order by saddr desc limit 5").repartition(1).write
      //连接数据库.format("jdbc").option("url","jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8").option("driver","com.mysql.jdbc.Driver")//保存至数据库的数据表名.option("dbtable","addr")//用户名.option("user","root")//连接数据库的密码.option("password","123123")//不破坏数据表结构,在后添加.mode(SaveMode.Append).save()//************ End ***********
    spark.stop()}}

importorg.apache.spark.sql.{SaveMode,SparkSession}
object industrydata {
  def main(args:Array[String]):Unit={
    val spark =SparkSession.builder().appName("SparkCleanJob").master("local[*]").getOrCreate()//########## Begin ############//读取数据,用逗号分隔,第一行不做为数据,做为标题
    val df1 = spark.read.option("delimiter",",").option("header",true).csv("/data/workspace/myshixun/die_data.csv")
    df1.createOrReplaceTempView("df1")//使用spark SQL语句,根据行业统计企业倒闭top10
    val df=spark.sql("select df1.cat as industry,count(df1.cat) as catindustry from df1 group by df1.cat order by catindustry desc limit 10 ").repartition(1).write
      //连接数据库.format("jdbc").option("url","jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8").option("driver","com.mysql.jdbc.Driver")//数据表名.option("dbtable","industry").option("user","root").option("password","123123")//不破坏数据表结构,在后添加.mode(SaveMode.Append).save()//############ End ###########
    spark.stop()}}

importorg.apache.spark.sql.{SaveMode,SparkSession}
object closedown {
  def main(args:Array[String]):Unit={
    val spark =SparkSession.builder().appName("SparkCleanJob").master("local[*]").getOrCreate()//############ Begin ###########//读取数据,用逗号分隔,第一行不做为数据,做为标题
    val df1 = spark.read.option("delimiter",",").option("header",true).csv("/data/workspace/myshixun/die_data.csv")
    df1.createOrReplaceTempView("df1")//使用spark SQL语句,根据倒闭原因字段,找到企业倒闭的主要原因,统计主要原因的个数
    val df=spark.sql("select df1.death_reason as death_reason,count(df1.death_reason) as dreason from df1 group by df1.death_reason order by dreason desc").repartition(1).write
      //连接数据库.format("jdbc")//数据库名.option("url","jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8").option("driver","com.mysql.jdbc.Driver")//数据表名.option("dbtable","cldown").option("user","root").option("password","123123")//不破坏表结构,在后面添加.mode(SaveMode.Append).save()//############ End ###########
    spark.stop()}}

importorg.apache.spark.sql.{SaveMode,SparkSession}
object comfinanc {
  def main(args:Array[String]):Unit={
    val spark =SparkSession.builder().appName("SparkCleanJob").master("local[*]").getOrCreate()//############ Begin ###########//读取数据,用逗号分隔,去除表头,第一行不做为数据,做为标题
    val df1 = spark.read.option("delimiter",",").option("header",true).csv("/data/workspace/myshixun/die_data.csv")
    df1.createOrReplaceTempView("df1")//使用spark SQL语句,根据行业细分领域字段,统计企业倒闭分布情况top20
    val df=spark.sql("select df1.se_cat as se_cat,count(df1.se_cat) as countsecat from df1 group by df1.se_cat order by countsecat desc limit 10").repartition(1).write
      //连接数据库.format("jdbc").option("url","jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8").option("driver","com.mysql.jdbc.Driver")//数据表名.option("dbtable","secat").option("user","root").option("password","123123")//不破坏表结构,在后面添加.mode(SaveMode.Append).save()//使用spark SQL语句,统计倒闭企业融资情况
    val d1=spark.sql("select df1.financing as financing,count(df1.financing) as countfinanc from df1 group by df1.financing order by countfinanc desc").repartition(1).write
      //连接数据库.format("jdbc").option("url","jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8").option("driver","com.mysql.jdbc.Driver")//数据表名.option("dbtable","financing").option("user","root").option("password","123123")//不破坏表结构,在后面添加.mode(SaveMode.Append).save()//########## End #########
    spark.stop()}}

importorg.apache.spark.sql.{SaveMode,SparkSession}
object yeardata {
  def main(args:Array[String]):Unit={
    val spark =SparkSession.builder().appName("SparkCleanJob").master("local[*]").getOrCreate()//############ Begin ###########//读取数据,用逗号分隔,第一行不做为数据,做为标题
    val df1 = spark.read.option("delimiter",",").option("header",true).csv("/data/workspace/myshixun/die_data.csv")
    df1.createOrReplaceTempView("df1")//根据企业成立时间字段,统计每年有多少成立的企业
    val d1=spark.sql("select df1.bornyear as bornyear,count(df1.bornyear) as byear from df1 group by df1.bornyear order by bornyear desc limit 10").repartition(1).write
        //连接数据库.format("jdbc").option("url","jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8").option("driver","com.mysql.jdbc.Driver")//数据表名.option("dbtable","bornyear").option("user","root").option("password","123123")//不破坏表结构,在后面添加.mode(SaveMode.Append).save()//根据企业倒闭年份字段,统计企业每个年份倒闭的数量
    val d2=spark.sql("select df1.deathyear as deathyear,count(df1.deathyear) as dyear from df1 group by df1.deathyear order by deathyear desc limit 10").repartition(1).write
          //连接数据库.format("jdbc")//数据库名.option("url","jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8").option("driver","com.mysql.jdbc.Driver")//数据表名.option("dbtable","deathyear").option("user","root").option("password","123123")//不破坏表结构,在后面添加.mode(SaveMode.Append).save()//############# End ############
    spark.stop()}}

第三关

from app import db
classdiedata(db.Model):
    __tablename__ ="addr"#**************** Begin ************#
    ID = db.Column(db.Integer, primary_key=True)##序号 主键
    com_addr = db.Column(db.String(255))##城市
    saddr = db.Column(db.Integer)##统计企业倒闭数量#************* End *************#
from flask importrender_template
from app.views importindex
from app importdb
from app.model.models [email protected]("/city")
def index1():
    selectdata = db.session.query(diedata.com_addr).all()
    selectdata1 = db.session.query(diedata.saddr).all()
    list1 =[]
    list2=[]
    #**********Begin**********#
    #获取城市倒闭企业top5的数据
    for k in selectdata:
        data ={"com_addr": k.com_addr,}
        list1.append(data)for i in selectdata1:
        list2.append(i[0])returnrender_template("test3.html", com_addr=list1, saddr=list2)
    #***********End***********#
<!DOCTYPEhtml><htmllang="en"><head><metacharset="UTF-8"><title>城市倒闭企业统计情况</title><scripttype="text/javascript"src="../static/js/echarts-all-3.js"></script></head><body><!--准备一个DOM容器--><divid="main"style="width: 1500px;height: 650px;"></div><script>var myChart = echarts.init(document.getElementById('main'));//*********** Begin ***************
    com_addr=[]{%for a in com_addr %}
            com_addr.push('{{ a.com_addr }}');{% endfor %}var saddr={{saddr|tojson}};
    option ={title:{text:'城市倒闭企业top5展示图',left:'center'},legend:{data:['城市倒闭企业个数'],//这里设置柱状图上面的方块,名称跟series里的name保持一致align:'right',//图例显示的位置:靠左,靠右还是居中的设置.不设置则居中right:10,},xAxis:{type:'category',data: com_addr
      },yAxis:{type:'value',name:'倒闭个数',axisLabel:{formatter:'{value} 个'}},series:[{data: saddr,type:'bar',name:'城市倒闭企业个数',itemStyle:{normal:{color:'blue',lineStyle:{color:'blue'},label:{show:true}}}}]};
    myChart.setOption(option);//************ End ***************</script></body></html>

from app import db
classdiedata(db.Model):
    __tablename__ ="industrydata"#************* Begin ************
    
    ID = db.Column(db.Integer, primary_key=True)##序号 主键
    industry = db.Column(db.String(255))##行业名
    catindustry = db.Column(db.Integer)##行业倒闭数#************* End ************
from flask import render_template
from app.views import index
from app import db
from app.model.models import diedata
@index.route("/industry")defindex1():#************* Begin ************
    selectdata = db.session.query(diedata.industry).all()
    selectdata1 = db.session.query(diedata.catindustry).all()
    list1 =[]
    list2=[]for k in selectdata:
        data ={"industry": k.industry,}
        list1.append(data)for i in selectdata1:
        list2.append(i[0])return render_template("test3.html", industry=list1, catindustry=list2)#************* End *************
<!DOCTYPEhtml><htmllang="en"><head><metacharset="UTF-8"><title>城市倒闭企业统计情况</title><scripttype="text/javascript"src="../static/js/echarts-all-3.js"></script></head><body><!--准备一个DOM容器--><divid="main"style="width: 1500px;height: 650px;"></div><script>var myChart = echarts.init(document.getElementById('main'));//************* Begin ************
    industry=[]{%for a in industry %}
            industry.push('{{ a.industry }}');{% endfor %}var catindustry={{catindustry|tojson}};
    option ={title:{text:'行业企业倒闭top10折线图',left:'center'},legend:{data:['行业企业倒闭数'],//这里设置柱状图上面的方块,名称跟series里的name保持一致align:'right',//图例显示的位置:靠左,靠右还是居中的设置.不设置则居中right:10,},xAxis:{type:'category',name:'行业分类',axisLabel:{formatter:'{value}'},data: industry
          },yAxis:{type:'value',name:'行业企业倒闭数',axisLabel:{formatter:'{value} 个'}},series:[{name:'行业企业倒闭数',data: catindustry,type:'line',smooth:true,label:{show:true},itemStyle:{normal:{color:'green',lineStyle:{color:'green'},label:{show:true}}}}]};
    myChart.setOption(option);//************* End ************</script></body></html>

from app import db
classdiedata(db.Model):
    __tablename__ ="closedown"############ Begin ###########
    ID = db.Column(db.Integer, primary_key=True)##序号 主键
    death_reason = db.Column(db.String(255))##倒闭原因
    dreason = db.Column(db.Integer)##倒闭原因统计############ End ###########
from flask import render_template
from app.views import index
from app import db
from app.model.models import diedata
@index.route("/deathreason")defindex1():
    selectdata = db.session.query(diedata.death_reason,diedata.dreason).all()
    list1 =[]############# Begin ############for k in selectdata:
        data ={"name": k.death_reason,"value":k.dreason
        }
        list1.append(data)return render_template("test3.html", datas=list1)############# End ############
<!DOCTYPEhtml><htmllang="en"><head><metacharset="UTF-8"><title>企业倒闭的原因</title><scripttype="text/javascript"src="../static/js/echarts-all-3.js"></script></head><body><!--准备一个DOM容器--><divid="main"style="width: 1500px;height: 650px;"></div><script>var myChart = echarts.init(document.getElementById('main'));//########### Begin #############var datas={{datas|tojson}};
        option ={title:{text:'企业倒闭原因结果统计图',left:'center'},legend:{top:'bottom',data:datas
              },tooltip:{trigger:'item',formatter:'{b} : {c} ({d}%)'},toolbox:{show:true},series:[{type:'pie',radius:[50,250],center:['50%','50%'],roseType:'area',itemStyle:{borderRadius:8},data:datas
                }]};
    myChart.setOption(option);//########### End #############</script></body></html>

from app import db
classdiedata(db.Model):
    __tablename__ ="secat"############## Begin ###########
    ID = db.Column(db.Integer, primary_key=True)##序号 主键
    se_cat = db.Column(db.String(255))##细分领域
    countsecat = db.Column(db.Integer)##细分领域企业倒闭数############## End ############classdiedata1(db.Model):
    __tablename__ ="financing"############## Begin ###########
    ID = db.Column(db.Integer, primary_key=True)##序号 主键
    financing = db.Column(db.String(255))##融资名
    countfinanc = db.Column(db.Integer)##融资个数############## End ############
from flask import render_template
from app.views import index
from app import db
from app.model.models import diedata
from app.model.models import diedata1
@index.route("/fincat")defindex1():
    selectdata = db.session.query(diedata.se_cat).all()
    selectdata1 =db.session.query(diedata.countsecat).all()
    selectdata2=db.session.query(diedata1.financing).all()
    selectdata3=db.session.query(diedata1.countfinanc).all()
    list1 =[]
    list2 =[]
    list3 =[]
    list4 =[]############## Begin ###########for i in selectdata:
        data ={"se_cat": i.se_cat,}
        list1.append(data)for j in selectdata1:
        list2.append(j[0])for x in selectdata2:
        data ={"financing": x.financing,}
        list3.append(data)for y in selectdata3:
        list4.append(y[0])return render_template("test3.html", se_cat=list1,countsecat=list2,financing=list3,countfinanc=list4)############## End ###########
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>企业融资以及细分领域倒闭企业数据</title><script type="text/javascript" src="../static/js/echarts-all-3.js"></script></head><body><!--准备一个DOM容器--><div id="main" style="width: 1500px;height: 650px;"></div><script>var myChart = echarts.init(document.getElementById('main'));//############## Begin ###########
    se_cat=[]{%for a in se_cat %}
            se_cat.push('{{ a.se_cat }}');{% endfor %}var countsecat={{countsecat|tojson}};
        financing=[]{%for b in financing %}
            financing.push('{{ b.financing }}');{% endfor %}var countfinanc={{countfinanc|tojson}};
        option ={
          title:[{
              left:'center',
              text: '细分领域企业倒闭数'
            },{
              top:'55%',
              left:'center',
              text:'企业融资情况'}],
          tooltip:{
            trigger:'axis'},
          legend:{
                  data:['细分领域','融资'],
                  left:10},
          xAxis:[{
              data: se_cat
            },{
              data: financing,
              gridIndex:1}],
          yAxis:[{},{
              gridIndex:1}],
          grid:[{
              bottom:'60%'},{
              top:'60%'}],
          series:[{
              name:'细分领域',
              type:'bar',
              showSymbol:true,
              data: countsecat,
              label:{
                    show:true},
              itemStyle:{
                        normal:{
                        color:'red',
                        lineStyle:{
                            color:'red'},
                        label :{show:true}}}},{
              name:'融资',
              type:'line',
              showSymbol:true,
              data: countfinanc,
              xAxisIndex:1,
              yAxisIndex:1,
              label:{
                    show:true},
              itemStyle:{
                        normal:{
                        color:'green',
                        lineStyle:{
                            color:'green'},
                        label :{show:true}}}}]};
    myChart.setOption(option);//############## End ###########</script></body></html>

from app import db
classdiedata(db.Model):
    __tablename__ ="bornyear"########### Begin ##########
    ID = db.Column(db.Integer, primary_key=True)##序号 主键
    bornyear = db.Column(db.String(255))##成立年份
    byear = db.Column(db.Integer)##计数########### End ##########classdiedata1(db.Model):
    __tablename__ ="deathyear"########### Begin ##########
    ID = db.Column(db.Integer, primary_key=True)##序号 主键
    deathyear = db.Column(db.String(255))##倒闭年份
    dyear = db.Column(db.Integer)##计数########### End ##########
from flask import render_template
from app.views import index
from app import db
from app.model.models import diedata
from app.model.models import diedata1
@index.route("/ydata")defindex1():########### Begin ##########
    selectdata = db.session.query(diedata.bornyear,diedata.byear).all()
    selectdata1 =db.session.query(diedata1.deathyear,diedata1.dyear).all()
    list1 =[]
    list2 =[]
    list3 =[]
    list4 =[]for x in selectdata:
        list1.append(str(x[0])+'年')
        list2.append(x[1])for j in selectdata1:
        list3.append(str(j[0])+'年')
        list4.append(j[1])############ End ############return render_template("test3.html", bornyear=list1,byear=list2,deathyear=list3,dyear=list4)
<!DOCTYPEhtml><htmllang="en"><head><metacharset="UTF-8"><title>企业成立年份和倒闭年份</title><scripttype="text/javascript"src="../static/js/echarts-all-3.js"></script></head><body><!--准备一个DOM容器--><divid="main"style="width: 1500px;height: 650px;"></div><script>//########### Begin ###########var myChart = echarts.init(document.getElementById('main'));var bornyear={{bornyear|tojson}};var byear={{byear|tojson}};var deathyear={{deathyear|tojson}};var dyear={{dyear|tojson}};
        option ={title:[{left:'center',text:'企业成立年份柱状图'},{top:'55%',left:'center',text:'企业倒闭年份柱状图'}],tooltip:{trigger:'axis'},legend:{data:['成立年份','倒闭年份'],left:10},xAxis:[{data: bornyear
            },{data: deathyear,gridIndex:1}],yAxis:[{},{gridIndex:1}],grid:[{bottom:'60%'},{top:'60%'}],series:[{name:'成立年份',type:'bar',showSymbol:true,data: byear,label:{show:true},itemStyle:{normal:{color:'red',lineStyle:{color:'red'},label:{show:true}}}},{name:'倒闭年份',type:'bar',showSymbol:true,data: dyear,xAxisIndex:1,yAxisIndex:1,label:{show:true},itemStyle:{normal:{color:'green',lineStyle:{color:'green'},label:{show:true}}}}]};
    myChart.setOption(option);//########### End ###########</script></body></html>
标签: 大数据

本文转载自: https://blog.csdn.net/qq_62678419/article/details/139772725
版权归原作者 乐正龙牙lox 所有, 如有侵权,请联系我们删除。

“头歌大数据答案(自用)”的评论:

还没有评论