如何在BigQuery(或迷你图或颜色渐变)中构建"星级"报告

Mos*_*sky 6 google-bigquery

假设我有以下示例输入:

WITH Ratings AS (
    (SELECT 'A' name, 2 score) UNION ALL
    (SELECT 'B' name, 0 score) UNION ALL
    (SELECT 'C' name, 5 score) UNION ALL
    (SELECT 'D' name, 1 score))
Run Code Online (Sandbox Code Playgroud)

score0到5之间的数字在哪里.如何生成显示名称和相应星数的报告?

Mos*_*sky 12

我们可以使用两个Unicode字符将星级评级建立为字符串:

? - Unicode code point 9733 
? - Unicode code point 9734
Run Code Online (Sandbox Code Playgroud)

我们可以使用CODE_POINTS_TO_STRING函数来构建恒星,并且REPEAT可以生成正确数量的恒星

将样本输入的解决方案组合在一起将是:

WITH Ratings AS (
(SELECT 'A' name, 2 score) UNION ALL
(SELECT 'B' name, 0 score) UNION ALL
(SELECT 'C' name, 5 score) UNION ALL
(SELECT 'D' name, 1 score))

SELECT 
  name, 
  CONCAT(
    REPEAT(CODE_POINTS_TO_STRING([9733]), score),
    REPEAT(CODE_POINTS_TO_STRING([9734]), 5-score)) score
FROM Ratings
Run Code Online (Sandbox Code Playgroud)

它将产生以下结果:

name    score
A       ?????
B       ?????
C       ?????
D       ?????
Run Code Online (Sandbox Code Playgroud)


Fel*_*ffa 5

我的条目是一个颜色渐变,因为迷你图只对某些字体看起来很好 - 而且这不是BigQuery Web UI使用的字体.

在一天中,Stack Overflow何时是每个标记最活跃的:

#standardSQL
CREATE TEMP FUNCTION barchart(v ARRAY<FLOAT64>, mm STRUCT<min FLOAT64, max FLOAT64>) AS ((
    SELECT STRING_AGG(SUBSTR('', 1+CAST(ROUND(y) AS INT64), 1), '') 
    FROM (SELECT IFNULL(SAFE_DIVIDE((e-mm.min),(mm.max-mm.min))*4, 0) y FROM UNNEST(v) e))); 
CREATE TEMP FUNCTION vbar(v ARRAY<FLOAT64>) AS ( 
  barchart(v, (SELECT AS STRUCT MIN(a), MAX(a) FROM UNNEST(v) a)) 
);


WITH top_tags AS (
 (SELECT x.value FROM (SELECT APPROX_TOP_COUNT(tag, 24) x FROM `bigquery-public-data.stackoverflow.posts_questions`, UNNEST(SPLIT(tags,'|')) tag WHERE EXTRACT(YEAR FROM creation_date)>=2016), UNNEST(x) x)
)

SELECT tag, vbar(ARRAY_AGG(1.0*hhh.count ORDER BY hhh.value)) gradient, SUM(hhh.count)  c
FROM (
  SELECT tag, APPROX_TOP_COUNT(EXTRACT(HOUR FROM creation_date), 24) h_h
  FROM `bigquery-public-data.stackoverflow.posts_questions`, UNNEST(SPLIT(tags,'|')) tag
  WHERE tag IN (SELECT * FROM top_tags) AND EXTRACT(YEAR FROM creation_date)>=2016
  GROUP BY 1
), UNNEST(h_h) hhh
GROUP BY tag
ORDER BY STRPOS(gradient, '')



Row gradient                                                c       tag  
1       317538  android  
2       59445   asp.net  
3       159134  ios  
4       111988  angularjs    
5       212843  jquery   
6       138143  mysql    
7       107586  swift    
8       318294  php  
9       84723   json     
10      233100  html     
11      390245  java     
12      83787   angular  
13      70150   sql-server   
14      534663  javascript   
15      291541  c#   
16      65668   c    
17      111792  sql  
18      158999  css  
19      88146   arrays   
20      61840   ruby-on-rails    
21      136265  c++  
22      104218  node.js  
23      360396  python   
24      98690   r   
Run Code Online (Sandbox Code Playgroud)

在此输入图像描述

更简洁的阴影渐变,但只有3个值:

#standardSQL
CREATE TEMP FUNCTION barchart(v ARRAY<FLOAT64>, mm STRUCT<min FLOAT64, max FLOAT64>) AS ((
    SELECT STRING_AGG(SUBSTR('???', 1+CAST(ROUND(y) AS INT64), 1), '') 
    FROM (SELECT IFNULL(SAFE_DIVIDE((e-mm.min),(mm.max-mm.min))*2, 0) y FROM UNNEST(v) e))); 
CREATE TEMP FUNCTION vbar(v ARRAY<FLOAT64>) AS ( 
  barchart(v, (SELECT AS STRUCT MIN(a), MAX(a) FROM UNNEST(v) a)) 
);



WITH top_countries AS (
 (SELECT x.value FROM (SELECT APPROX_TOP_COUNT(country_code, 12) x FROM `ghtorrent-bq.ght_2017_09_01.users`), UNNEST(x) x)
)

SELECT vbar(ARRAY_AGG(1.0*hhh.count ORDER BY hhh.value)) gradient, SUM(hhh.count) c, country_code
FROM (
  SELECT country_code, APPROX_TOP_COUNT(EXTRACT(HOUR FROM a.created_at), 24) h_h
  FROM `githubarchive.year.2017` a
  JOIN `ghtorrent-bq.ght_2017_09_01.users` b
  ON a.actor.login=b.login
  WHERE country_code IN (SELECT * FROM top_countries) 
  AND actor.login NOT IN (SELECT value FROM (SELECT APPROX_TOP_COUNT(actor.login, 1000) x FROM `githubarchive.year.2017` WHERE type='WatchEvent'), UNNEST(x))
  AND a.type='WatchEvent'
  GROUP BY 1
), UNNEST(h_h) hhh
GROUP BY country_code 
ORDER BY STRPOS(gradient, '?')

Row gradient                    c       country_code     
1   ????????????????????????    204023  au   
2   ????????????????????????    293589  jp   
3   ????????????????????????    2125724 cn   
4   ????????????????????????    447092  in   
5   ????????????????????????    381510  ru   
6   ????????????????????????    545906  de   
7   ????????????????????????    395949  fr   
8   ????????????????????????    491068  gb   
9   ????????????????????????    419608  br   
10  ????????????????????????    2443381 us   
11  ????????????????????????    294793  ca   
Run Code Online (Sandbox Code Playgroud)

迷你线的简短代码 - 适用于Data Studio:

#standardSQL
CREATE TEMP FUNCTION barchart(v ARRAY<FLOAT64>, mm STRUCT<min FLOAT64, max FLOAT64>) AS ((
    SELECT STRING_AGG(SUBSTR('????????', 1+CAST(ROUND(y) AS INT64), 1), '') 
    FROM (SELECT IFNULL(SAFE_DIVIDE((e-mm.min),(mm.max-mm.min))*7, 0) y FROM UNNEST(v) e))); 
CREATE TEMP FUNCTION vbar(v ARRAY<FLOAT64>) AS ( 
  barchart(v, (SELECT AS STRUCT MIN(a), MAX(a) FROM UNNEST(v) a)) 
);
Run Code Online (Sandbox Code Playgroud)

  • 忘了说 - 喜欢它!:O) (2认同)