Want to know how I analyzed the Summit session data? I exported the data from the abstract selection system into a tab delimited file. Since I use a Mac at home, I used the tools available to me: PostgreSQL.
I loaded the data using the PostgreSQL COPY command to bulk load the report output and then did some data mojo using PostgreSQL. Most of it was exactly the same as it would be for SQL Server with a few differences. Here’s the code that I used:
SELECT COUNT(*) AS total_sessions
FROM abstracts;
GO
-- Yup, I used the GO statement separator. This is a byproduct
-- of the client that I use, not because of PostgreSQL.
SELECT COUNT(DISTINCT first_name || ' ' || last_name) AS name_count
FROM abstracts ;
GO
SELECT job_title,
COUNT(job_title) AS jt_count
FROM ( SELECT job_title,
first_name,
last_name
FROM abstracts
GROUP BY job_title,
first_name,
last_name
) AS x
GROUP BY job_title
ORDER BY jt_count DESC;
GO
WITH c AS (
SELECT job_title,
first_name,
last_name
FROM abstracts
GROUP BY job_title,
first_name,
last_name
)
SELECT SUM(CASE WHEN LOWER(job_title) LIKE '%engineer%' THEN 1 ELSE 0 END) AS engineer_count,
SUM(CASE WHEN LOWER(job_title) LIKE '%manager%' THEN 1 ELSE 0 END) AS manager_count,
SUM(CASE WHEN LOWER(job_title) LIKE '%developer%' THEN 1 ELSE 0 END) AS developer_count,
SUM(CASE WHEN LOWER(job_title) LIKE '%consultant%' THEN 1 ELSE 0 END) AS consultant_count,
SUM(CASE WHEN LOWER(job_title) LIKE '%business intelligence%' THEN 1
WHEN LOWER(job_title) LIKE '%b.i.%' THEN 1
WHEN LOWER(job_title) LIKE '%BI%' THEN 1
ELSE 0 END) AS bi_count,
SUM(CASE WHEN LOWER(job_title) LIKE '%architect%' THEN 1 ELSE 0 END) AS architect_count
FROM c;
GO
SELECT COUNT(DISTINCT company) AS company_count
FROM abstracts;
GO
SELECT DISTINCT
category,
track,
COUNT(track) OVER (PARTITION BY category) AS the_count_by_category,
COUNT(track) OVER (PARTITION BY category, track) AS the_count_by_track
FROM abstracts
ORDER BY category, track;
GO
SELECT AVG(author_count) AS author_average
FROM ( SELECT DISTINCT
first_name,
last_name,
COUNT(session_title) OVER (PARTITION BY first_name, last_name) AS author_count
FROM abstracts
) AS x;
GO
-- You might be weirded out by this string_to_array and array_upper.
-- PostgreSQL has an array data type. We're using an array of strings
-- to get an accurate word count
SELECT AVG(CHAR_LENGTH(abstract)) AS avg_char_count,
STDDEV(CHAR_LENGTH(abstract)) AS char_count_stddev,
AVG(array_upper(string_to_array(abstract, ' '), 1)) AS avg_word_count,
STDDEV(array_upper(string_to_array(abstract, ' '), 1)) AS word_count_stddev
FROM abstracts;
GO
-- Hey, look at that, we can use a single TRIM function to do all work
SELECT SUM(CASE WHEN TRIM(BOTH FROM presented_before) = 'YES' THEN 1 ELSE 0 END) AS presented_before,
SUM(CASE WHEN TRIM(BOTH FROM presented_before) = 'NO' THEN 1 ELSE 0 END) AS never_presented
FROM abstracts;
GO
SELECT level,
COUNT(*)
FROM abstracts
GROUP BY level
ORDER BY level;
GO