Constructing Hospital Score Using PCA

1. EDA

reading csv file

merging plots

hpdata <- inner_join(table_1, table_2, by='hpid')

selecting variables

hpdata <- hpdata %>%
  select(dutyName.x, starts_with('h'), starts_with('mk'))%>%
  select(-hv1, -hv12, -hvidate)
glimpse(hpdata)
## Rows: 313
## Columns: 36
## $ dutyName.x <chr> "의료법인강릉동인병원", "강릉아산병원", "강원도삼척의료원", "강원도속초의료원", "의료법인보광의...
## $ hpid       <chr> "A2200005", "A2200008", "A2200007", "A2200012", "A220004...
## $ hv10       <chr> "0", "Y", "0", "0", "0", "0", "0", "Y", "Y", "Y", "0", "...
## $ hv11       <chr> "0", "Y", "Y", "0", "0", "0", "0", "Y", "Y", "Y", "0", "...
## $ hv2        <dbl> 6, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0,...
## $ hv3        <dbl> 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 5, 10, 4, 0, 0...
## $ hv4        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ hv5        <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "...
## $ hv6        <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ hv7        <chr> "Y", "Y", "0", "0", "0", "0", "0", "Y", "Y", "Y", "0", "...
## $ hv8        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ hv9        <dbl> 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ hvamyn     <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "...
## $ hvangioayn <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "...
## $ hvcc       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ hvccc      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ hvctayn    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "...
## $ hvec       <dbl> 17, 14, 18, 19, 8, 4, 8, 15, 14, 16, 9, 10, 10, 10, 13, ...
## $ hvgc       <dbl> 64, 62, 15, 59, 37, 148, 90, 91, 143, 42, 73, 19, 30, 10...
## $ hvicc      <dbl> 1, 0, 1, 3, 6, 3, 0, 0, 0, 0, 11, 1, 0, 6, 0, 0, 0, 4, 4...
## $ hvmriayn   <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "...
## $ hvncc      <dbl> 0, 6, 0, 0, 0, 0, 0, 17, 10, 0, 0, 0, 0, 3, 13, 4, -1, 0...
## $ hvoc       <dbl> 5, 13, 2, 3, 4, 3, 6, 15, 11, 9, 2, 2, 0, 8, 10, 2, 13, ...
## $ hvventiayn <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "...
## $ mkioskty25 <chr> "Y", "Y", "N", "Y", "Y", "N", "Y", "Y", "Y", "Y", "Y", "...
## $ mkioskty1  <chr> "N", "Y", "Y", "N", "N", "N", "N", "Y", "Y", "N", "N", "...
## $ mkioskty2  <chr> "N", "N", "N", "N", "N", "N", "Y", "N", "N", "N", "N", "...
## $ mkioskty3  <chr> "Y", "Y", "N", "N", "N", "N", "N", "Y", "Y", "Y", "N", "...
## $ mkioskty4  <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "...
## $ mkioskty5  <chr> "Y", "Y", "N", "N", "N", "N", "N", "Y", "Y", "Y", "N", "...
## $ mkioskty6  <chr> "Y", "Y", "Y", "N", "N", "Y", "Y", "Y", "Y", "Y", "N", "...
## $ mkioskty7  <chr> "N", "Y", "N", "N", "N", "Y", "N", "Y", "N", "N", "N", "...
## $ mkioskty8  <chr> "Y", "Y", "N", "Y", "N", "Y", "N", "Y", "Y", "Y", "N", "...
## $ mkioskty9  <chr> "Y", "Y", "Y", "N", "N", "N", "N", "Y", "Y", "Y", "Y", "...
## $ mkioskty10 <chr> "N", "Y", "N", "N", "N", "N", "N", "Y", "Y", "N", "N", "...
## $ mkioskty11 <chr> "Y", "N", "N", "N", "Y", "N", "N", "Y", "Y", "N", "N", "...
str(hpdata)
## tibble [313 x 36] (S3: tbl_df/tbl/data.frame)
##  $ dutyName.x: chr [1:313] "의료법인강릉동인병원" "강릉아산병원" "강원도삼척의료원" "강원도속초의료원" ...
##  $ hpid      : chr [1:313] "A2200005" "A2200008" "A2200007" "A2200012" ...
##  $ hv10      : chr [1:313] "0" "Y" "0" "0" ...
##  $ hv11      : chr [1:313] "0" "Y" "Y" "0" ...
##  $ hv2       : num [1:313] 6 0 0 0 0 0 0 1 0 0 ...
##  $ hv3       : num [1:313] 0 1 0 0 0 0 0 2 0 0 ...
##  $ hv4       : num [1:313] 0 0 0 0 0 0 0 0 0 0 ...
##  $ hv5       : chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ hv6       : num [1:313] 0 1 0 0 0 0 0 1 1 0 ...
##  $ hv7       : chr [1:313] "Y" "Y" "0" "0" ...
##  $ hv8       : num [1:313] 0 0 0 0 0 0 0 0 0 0 ...
##  $ hv9       : num [1:313] 0 0 0 0 0 0 0 4 0 0 ...
##  $ hvamyn    : chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ hvangioayn: chr [1:313] "N" "N" "N" "N" ...
##  $ hvcc      : num [1:313] 0 0 0 0 0 0 0 0 0 0 ...
##  $ hvccc     : num [1:313] 0 0 0 0 0 0 0 0 0 0 ...
##  $ hvctayn   : chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ hvec      : num [1:313] 17 14 18 19 8 4 8 15 14 16 ...
##  $ hvgc      : num [1:313] 64 62 15 59 37 148 90 91 143 42 ...
##  $ hvicc     : num [1:313] 1 0 1 3 6 3 0 0 0 0 ...
##  $ hvmriayn  : chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ hvncc     : num [1:313] 0 6 0 0 0 0 0 17 10 0 ...
##  $ hvoc      : num [1:313] 5 13 2 3 4 3 6 15 11 9 ...
##  $ hvventiayn: chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ mkioskty25: chr [1:313] "Y" "Y" "N" "Y" ...
##  $ mkioskty1 : chr [1:313] "N" "Y" "Y" "N" ...
##  $ mkioskty2 : chr [1:313] "N" "N" "N" "N" ...
##  $ mkioskty3 : chr [1:313] "Y" "Y" "N" "N" ...
##  $ mkioskty4 : chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ mkioskty5 : chr [1:313] "Y" "Y" "N" "N" ...
##  $ mkioskty6 : chr [1:313] "Y" "Y" "Y" "N" ...
##  $ mkioskty7 : chr [1:313] "N" "Y" "N" "N" ...
##  $ mkioskty8 : chr [1:313] "Y" "Y" "N" "Y" ...
##  $ mkioskty9 : chr [1:313] "Y" "Y" "Y" "N" ...
##  $ mkioskty10: chr [1:313] "N" "Y" "N" "N" ...
##  $ mkioskty11: chr [1:313] "Y" "N" "N" "N" ...

length of unique values in each variable

nuniq <- c()
for(i in 1:length(colnames(hpdata))) {
  nuniq[i] <- hpdata[,i] %>%
  n_distinct()
}
nuniq
##  [1] 312 313   3   3  13  12   1   2   9   3   1  10   1   1   5   6   2  35 140
## [20]  18   3  21  29   2   2   2   2   2   2   2   2   2   2   2   2   2

remove columns with zero variance

hpdata <- hpdata[,nuniq!=1]
str(hpdata)
## tibble [313 x 32] (S3: tbl_df/tbl/data.frame)
##  $ dutyName.x: chr [1:313] "의료법인강릉동인병원" "강릉아산병원" "강원도삼척의료원" "강원도속초의료원" ...
##  $ hpid      : chr [1:313] "A2200005" "A2200008" "A2200007" "A2200012" ...
##  $ hv10      : chr [1:313] "0" "Y" "0" "0" ...
##  $ hv11      : chr [1:313] "0" "Y" "Y" "0" ...
##  $ hv2       : num [1:313] 6 0 0 0 0 0 0 1 0 0 ...
##  $ hv3       : num [1:313] 0 1 0 0 0 0 0 2 0 0 ...
##  $ hv5       : chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ hv6       : num [1:313] 0 1 0 0 0 0 0 1 1 0 ...
##  $ hv7       : chr [1:313] "Y" "Y" "0" "0" ...
##  $ hv9       : num [1:313] 0 0 0 0 0 0 0 4 0 0 ...
##  $ hvcc      : num [1:313] 0 0 0 0 0 0 0 0 0 0 ...
##  $ hvccc     : num [1:313] 0 0 0 0 0 0 0 0 0 0 ...
##  $ hvctayn   : chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ hvec      : num [1:313] 17 14 18 19 8 4 8 15 14 16 ...
##  $ hvgc      : num [1:313] 64 62 15 59 37 148 90 91 143 42 ...
##  $ hvicc     : num [1:313] 1 0 1 3 6 3 0 0 0 0 ...
##  $ hvmriayn  : chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ hvncc     : num [1:313] 0 6 0 0 0 0 0 17 10 0 ...
##  $ hvoc      : num [1:313] 5 13 2 3 4 3 6 15 11 9 ...
##  $ hvventiayn: chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ mkioskty25: chr [1:313] "Y" "Y" "N" "Y" ...
##  $ mkioskty1 : chr [1:313] "N" "Y" "Y" "N" ...
##  $ mkioskty2 : chr [1:313] "N" "N" "N" "N" ...
##  $ mkioskty3 : chr [1:313] "Y" "Y" "N" "N" ...
##  $ mkioskty4 : chr [1:313] "Y" "Y" "Y" "Y" ...
##  $ mkioskty5 : chr [1:313] "Y" "Y" "N" "N" ...
##  $ mkioskty6 : chr [1:313] "Y" "Y" "Y" "N" ...
##  $ mkioskty7 : chr [1:313] "N" "Y" "N" "N" ...
##  $ mkioskty8 : chr [1:313] "Y" "Y" "N" "Y" ...
##  $ mkioskty9 : chr [1:313] "Y" "Y" "Y" "N" ...
##  $ mkioskty10: chr [1:313] "N" "Y" "N" "N" ...
##  $ mkioskty11: chr [1:313] "Y" "N" "N" "N" ...
nuniq <- c()
for(i in 1:length(colnames(hpdata))) {
  nuniq[i] <- hpdata[,i] %>%
  n_distinct()
}
nuniq
##  [1] 312 313   3   3  13  12   2   9   3  10   5   6   2  35 140  18   3  21  29
## [20]   2   2   2   2   2   2   2   2   2   2   2   2   2

devide table

hpdata_f <- hpdata[,nuniq<=3]
hpdata_n <- hpdata[,nuniq>3]

recoding

hpdata_f <- hpdata_f %>%
  mutate_all(funs(recode(., 'N1'=0L, '0'=0L, 'N'=0L, '1'=1L, 'Y'=1L, .default=1L)))
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
str(hpdata_f)
## tibble [313 x 19] (S3: tbl_df/tbl/data.frame)
##  $ hv10      : int [1:313] 0 1 0 0 0 0 0 1 1 1 ...
##  $ hv11      : int [1:313] 0 1 1 0 0 0 0 1 1 1 ...
##  $ hv5       : int [1:313] 1 1 1 1 1 1 1 1 1 1 ...
##  $ hv7       : int [1:313] 1 1 0 0 0 0 0 1 1 1 ...
##  $ hvctayn   : int [1:313] 1 1 1 1 1 1 1 1 1 1 ...
##  $ hvmriayn  : int [1:313] 1 1 1 1 1 1 1 1 1 1 ...
##  $ hvventiayn: int [1:313] 1 1 1 1 1 1 1 1 1 1 ...
##  $ mkioskty25: int [1:313] 1 1 0 1 1 0 1 1 1 1 ...
##  $ mkioskty1 : int [1:313] 0 1 1 0 0 0 0 1 1 0 ...
##  $ mkioskty2 : int [1:313] 0 0 0 0 0 0 1 0 0 0 ...
##  $ mkioskty3 : int [1:313] 1 1 0 0 0 0 0 1 1 1 ...
##  $ mkioskty4 : int [1:313] 1 1 1 1 1 1 1 1 1 1 ...
##  $ mkioskty5 : int [1:313] 1 1 0 0 0 0 0 1 1 1 ...
##  $ mkioskty6 : int [1:313] 1 1 1 0 0 1 1 1 1 1 ...
##  $ mkioskty7 : int [1:313] 0 1 0 0 0 1 0 1 0 0 ...
##  $ mkioskty8 : int [1:313] 1 1 0 1 0 1 0 1 1 1 ...
##  $ mkioskty9 : int [1:313] 1 1 1 0 0 0 0 1 1 1 ...
##  $ mkioskty10: int [1:313] 0 1 0 0 0 0 0 1 1 0 ...
##  $ mkioskty11: int [1:313] 1 0 0 0 1 0 0 1 1 0 ...

merge again

hpdata <- bind_cols(hpdata_n, hpdata_f)
glimpse(hpdata)
## Rows: 313
## Columns: 32
## $ dutyName.x <chr> "의료법인강릉동인병원", "강릉아산병원", "강원도삼척의료원", "강원도속초의료원", "의료법인보광의...
## $ hpid       <chr> "A2200005", "A2200008", "A2200007", "A2200012", "A220004...
## $ hv2        <dbl> 6, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0,...
## $ hv3        <dbl> 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 5, 10, 4, 0, 0...
## $ hv6        <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ hv9        <dbl> 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ hvcc       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ hvccc      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ hvec       <dbl> 17, 14, 18, 19, 8, 4, 8, 15, 14, 16, 9, 10, 10, 10, 13, ...
## $ hvgc       <dbl> 64, 62, 15, 59, 37, 148, 90, 91, 143, 42, 73, 19, 30, 10...
## $ hvicc      <dbl> 1, 0, 1, 3, 6, 3, 0, 0, 0, 0, 11, 1, 0, 6, 0, 0, 0, 4, 4...
## $ hvncc      <dbl> 0, 6, 0, 0, 0, 0, 0, 17, 10, 0, 0, 0, 0, 3, 13, 4, -1, 0...
## $ hvoc       <dbl> 5, 13, 2, 3, 4, 3, 6, 15, 11, 9, 2, 2, 0, 8, 10, 2, 13, ...
## $ hv10       <int> 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0,...
## $ hv11       <int> 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0,...
## $ hv5        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ hv7        <int> 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,...
## $ hvctayn    <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ hvmriayn   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ hvventiayn <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ mkioskty25 <int> 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,...
## $ mkioskty1  <int> 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,...
## $ mkioskty2  <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,...
## $ mkioskty3  <int> 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0,...
## $ mkioskty4  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ mkioskty5  <int> 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,...
## $ mkioskty6  <int> 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0,...
## $ mkioskty7  <int> 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,...
## $ mkioskty8  <int> 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,...
## $ mkioskty9  <int> 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,...
## $ mkioskty10 <int> 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,...
## $ mkioskty11 <int> 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,...

centering and scaling

hpdata_z <- hpdata %>%
  mutate_each_(funs(scale), vars=colnames(hpdata)[3:32])
## Warning: `mutate_each_()` is deprecated as of dplyr 0.7.0.
## Please use `across()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
head(hpdata_z[,3:32])
## # A tibble: 6 x 30
##   hv2[,1] hv3[,1] hv6[,1] hv9[,1] hvcc[,1] hvccc[,1] hvec[,1] hvgc[,1] hvicc[,1]
##     <dbl>   <dbl>   <dbl>   <dbl>    <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1   3.12   -0.356  -0.235  -0.171   -0.149    -0.124    1.14    -0.364   -0.490 
## 2  -0.359   0.120   0.642  -0.171   -0.149    -0.124    0.654   -0.397   -0.762 
## 3  -0.359  -0.356  -0.235  -0.171   -0.149    -0.124    1.30    -1.19    -0.490 
## 4  -0.359  -0.356  -0.235  -0.171   -0.149    -0.124    1.46    -0.448    0.0548
## 5  -0.359  -0.356  -0.235  -0.171   -0.149    -0.124   -0.318   -0.819    0.871 
## 6  -0.359  -0.356  -0.235  -0.171   -0.149    -0.124   -0.967    1.05     0.0548
## # ... with 21 more variables: hvncc[,1] <dbl>, hvoc[,1] <dbl>, hv10[,1] <dbl>,
## #   hv11[,1] <dbl>, hv5[,1] <dbl>, hv7[,1] <dbl>, hvctayn[,1] <dbl>,
## #   hvmriayn[,1] <dbl>, hvventiayn[,1] <dbl>, mkioskty25[,1] <dbl>,
## #   mkioskty1[,1] <dbl>, mkioskty2[,1] <dbl>, mkioskty3[,1] <dbl>,
## #   mkioskty4[,1] <dbl>, mkioskty5[,1] <dbl>, mkioskty6[,1] <dbl>,
## #   mkioskty7[,1] <dbl>, mkioskty8[,1] <dbl>, mkioskty9[,1] <dbl>,
## #   mkioskty10[,1] <dbl>, mkioskty11[,1] <dbl>

2. PCA

Principal Component Analysis

hp_without_id <- hpdata_z[,3:32] %>%
    as.matrix()
hp_pca <- prcomp(hp_without_id)
hp_pca[[1]]
##  [1] 2.754002e+00 1.720755e+00 1.411599e+00 1.242049e+00 1.144886e+00
##  [6] 1.128249e+00 1.069838e+00 1.037569e+00 1.009868e+00 9.689509e-01
## [11] 9.242822e-01 9.118337e-01 8.872557e-01 8.452841e-01 8.235437e-01
## [16] 8.171788e-01 7.921716e-01 7.759205e-01 6.981109e-01 6.681701e-01
## [21] 6.511012e-01 6.362573e-01 5.868942e-01 5.730135e-01 5.228457e-01
## [26] 5.158809e-01 3.967430e-01 3.825128e-01 3.376772e-01 1.223023e-15
hp_pca[[2]][,1:3]
##                    PC1         PC2          PC3
## hv2        -0.18428779  0.13822699 -0.060154202
## hv3        -0.20384875  0.19991856 -0.086350699
## hv6        -0.17148933  0.14451215 -0.039068239
## hv9        -0.09808548  0.01830479  0.010927088
## hvcc       -0.09101464  0.14558902 -0.062165691
## hvccc      -0.12159677  0.10212124 -0.068971427
## hvec       -0.01194454 -0.23947804  0.036179058
## hvgc       -0.17597517  0.09361233 -0.139507068
## hvicc       0.04669590 -0.24742423  0.006235784
## hvncc      -0.26006542  0.15943534 -0.022511889
## hvoc       -0.27206005  0.10386183 -0.027060730
## hv10       -0.30026561  0.14313398 -0.006508880
## hv11       -0.28773147  0.13032689 -0.013353870
## hv5        -0.03921070 -0.18245036 -0.649749438
## hv7        -0.24181735 -0.12239585  0.050785418
## hvctayn    -0.03921070 -0.18245036 -0.649749438
## hvmriayn   -0.10320804 -0.20778141 -0.171891569
## hvventiayn -0.01475009  0.01723712  0.012073206
## mkioskty25 -0.14504399 -0.32545834  0.093917439
## mkioskty1  -0.30002855  0.09324527  0.035885149
## mkioskty2  -0.01240845 -0.17689169  0.072179790
## mkioskty3  -0.20985983 -0.27106174  0.126248272
## mkioskty4  -0.03275784 -0.08519503  0.009874346
## mkioskty5  -0.23969715 -0.20245868  0.115246037
## mkioskty6  -0.11951641 -0.33596619  0.099173657
## mkioskty7  -0.12034658 -0.09620336  0.051132989
## mkioskty8  -0.14749307 -0.29630323  0.082464088
## mkioskty9  -0.17809874 -0.26653048  0.117523007
## mkioskty10 -0.29958953  0.10095414  0.031058263
## mkioskty11 -0.23728115  0.05708709  0.033242411

variance explained: \(R^2\)

First conponent explains 25% of the total variance.

summary(hp_pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.7540 1.7208 1.41160 1.24205 1.14489 1.12825 1.06984
## Proportion of Variance 0.2528 0.0987 0.06642 0.05142 0.04369 0.04243 0.03815
## Cumulative Proportion  0.2528 0.3515 0.41794 0.46936 0.51305 0.55548 0.59364
##                            PC8     PC9   PC10    PC11    PC12    PC13    PC14
## Standard deviation     1.03757 1.00987 0.9690 0.92428 0.91183 0.88726 0.84528
## Proportion of Variance 0.03588 0.03399 0.0313 0.02848 0.02771 0.02624 0.02382
## Cumulative Proportion  0.62952 0.66352 0.6948 0.72329 0.75100 0.77724 0.80106
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.82354 0.81718 0.79217 0.77592 0.69811 0.66817 0.65110
## Proportion of Variance 0.02261 0.02226 0.02092 0.02007 0.01625 0.01488 0.01413
## Cumulative Proportion  0.82367 0.84593 0.86684 0.88691 0.90316 0.91804 0.93217
##                           PC22    PC23    PC24    PC25    PC26    PC27    PC28
## Standard deviation     0.63626 0.58689 0.57301 0.52285 0.51588 0.39674 0.38251
## Proportion of Variance 0.01349 0.01148 0.01094 0.00911 0.00887 0.00525 0.00488
## Cumulative Proportion  0.94567 0.95715 0.96809 0.97720 0.98608 0.99132 0.99620
##                          PC29      PC30
## Standard deviation     0.3377 1.223e-15
## Proportion of Variance 0.0038 0.000e+00
## Cumulative Proportion  1.0000 1.000e+00

scree plot

screeplot(hp_pca, main = "", col = "blue", type = "lines", pch = 21)

hospital score

hp_pc1 <- predict(hp_pca)[,1]
hp_score <- (100-20*hp_pc1)
hospital_score <- hpdata %>%
  select(dutyName.x,hpid)%>%
  mutate(score=hp_score)
skim(hospital_score)
Table 1: Data summary
Name hospital_score
Number of rows 313
Number of columns 3
_______________________
Column type frequency:
character 2
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
dutyName.x 0 1 3 23 0 312 0
hpid 0 1 8 8 0 313 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
score 0 1 100 55.08 -4.89 56.88 82.43 127.78 301.22 ▅▇▂▂▁

plotting hospital scores

library(ggplot2)
plot_score <- ggplot(hospital_score, aes(x=score))+
  geom_histogram(fill='sky blue', binwidth = 10)
plot_score

exporting dataset

write.csv(hospital_score, file = 'hospital_score')
<